Exemple #1
0
 def printTableDB(table):
     """ Displays the data in the database """
     conn = Accumulo(host="localhost",
                     port=50096,
                     user="******",
                     password="******")
     for entry in conn.scan(table):
         print(entry.row, entry.cf, entry.cq, entry.cv, entry.ts, entry.val)
     conn.close()
class EzRPCertStore(object):
    """
    Wrapper class to underlying database store which hold server certs for reverse proxy
    """

    def __init__(self, host='localhost', port=42424, user='******', password='******', table='ezfrontend', privateKey=None, logger=None):
        self.__table = table
        self.__signer = None
        self.__dbConnection = None
        self.__cf = "pfx"
        self.__cq = "enc"

        if logger is not None:
            self.__logger = logger
        else:
            self.__logger = logging.getLogger(self.__module__ + '.' + self.__class__.__name__)
            self.__logger.addHandler(logging.NullHandler())
        
        if privateKey is not None:
            self.__updateSigner(privateKey)

        self.__connectToAccumulo(host, port, user, password)


    def __connectToAccumulo(self, host, port, user, password):
        try:
            self.__dbConnection = Accumulo(host, port, user, password)
            self.__logger.debug('Successfully connected to CertStore')
        except Exception as ex:
            self.__logger.exception('Error in connecting to CertStore: %s' % str(ex))
            raise EzRPCertStoreException('Error in connecting to CertStore: %s' % str(ex))


    def __updateSigner(self, privateKey):
        with open(privateKey) as file:
            self.__signer = PKCS1_v1_5.new(RSA.importKey(file.read()))
            self.__logger.info('Updated signer for CertStore')


    def __ensureTable(self):
        if not self.__dbConnection.table_exists(self.__table):
            self.__logger.info('DB table %s doesn\'t exist in the Store. Creating ...' % self.__table)
            self.__dbConnection.create_table(self.__table)
            if not self.__dbConnection.table_exists(self.__table):
                self.__logger.error('Unable to ensure DB table exists in the Store.')
                raise EzRPCertStoreException('CertStore: Unable to ensure DB table exists in the Store.')


    def _generatePassword(self, serverName):
        password = '******' #salt
        
        if self.__signer is None:
            password = base64.b64encode(password + serverName)
        else:
            digest = SHA256.new(password + serverName)
            signature = self.__signer.sign(digest)
            password = base64.b64encode(signature)

        return password


    def _generatePkcs12(self, serverName, certContents, keyContents, password=None):
        key = OpenSSL.crypto.load_privatekey(OpenSSL.crypto.FILETYPE_PEM, keyContents)
        cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, certContents)
        
        pfx = OpenSSL.crypto.PKCS12()
        pfx.set_certificate(cert)
        pfx.set_privatekey(key)
        
        return pfx.export(passphrase=password)


    def _retrieveCertAndKey(self, pfx, serverName, password=None):
        p12 = OpenSSL.crypto.load_pkcs12(pfx, password)
        keycontents = OpenSSL.crypto.dump_privatekey(OpenSSL.crypto.FILETYPE_PEM, p12.get_privatekey())
        certContents = OpenSSL.crypto.dump_certificate( OpenSSL.crypto.FILETYPE_PEM, p12.get_certificate())
        return certContents, keycontents


    def put(self, serverName, certContents, keyContents):
        self.__ensureTable()

        writer = self.__dbConnection.create_batch_writer(self.__table)        
        value = self._generatePkcs12(serverName, certContents, keyContents, self._generatePassword(serverName))
    
        mutation = Mutation(serverName)
        mutation.put(cf=self.__cf, cq=self.__cq, val=value)
        writer.add_mutation(mutation)
        writer.close()
        self.__logger.debug('added cert/key contents for %s to store' % serverName)


    def get(self, serverName):
        self.__ensureTable()
        
        for entry in self.__dbConnection.scan(self.__table, cols=[[self.__cf, self.__cq]]):
            if entry.row == serverName:
                self.__logger.debug('retrieved cert/key for %s from store' % serverName)
                return self._retrieveCertAndKey(entry.val, serverName, self._generatePassword(serverName))
        return None, None


    def remove(self, serverName):
        self.__ensureTable()
        writer = self.__dbConnection.create_batch_writer(self.__table)
        mutation = Mutation(serverName)
        mutation.put(cf=self.__cf, cq=self.__cq, is_delete=True)
        writer.add_mutation(mutation)
        writer.close()
        self.__logger.debug('removed cert/key for %s from store' % serverName)


    def exists(self, serverName):
        self.__ensureTable()
        
        #use a sigle row range to narrow our scan
        range = Range(srow=serverName, scf=self.__cf, scq=self.__cq,
                      erow=serverName, ecf=self.__cf, ecq=self.__cq)
                      
        for entry in self.__dbConnection.scan(self.__table, scanrange=range):
            if entry.row == serverName:
                self.__logger('cert/key for %s exists in store' % serverName)
                return True
        self.__logger('cert/key for %s DOES NOT exist in store' % serverName)
        return False
class Benchmarker(object):

    BENCHMARKS = [(100, 10), (500, 50), (1000, 100), (5000, 500),
                  (10000, 1000)]

    FANCY_BENCHMARKS = [(2**i, 2**(i - 1)) for i in range(2, 14)]

    def __init__(self,
                 host="localhost",
                 port=42424,
                 user="******",
                 password="******",
                 num_trials=100,
                 filename='default_file.txt',
                 seed=None,
                 signer_ids=test_ids,
                 pki=test_pki):
        self.conn = Accumulo(host=host,
                             port=port,
                             user=user,
                             password=password)
        self.num_trials = num_trials
        self.filename = filename
        self.seed = seed
        self.signer_ids = signer_ids
        self.pki = pki

    def run_test(self,
                 table="test_table_5",
                 default_vis="default",
                 num_entries=100,
                 num_rows=15,
                 signClassName='RSASSA_PKCS1-v1_5',
                 write=True,
                 benchmark=False):
        """ Runs one iteration of the signature test. If benchmark is set to
            True, returns the lengths of time it took to sign all the entries
            and the time it took to verify all the entries.
        """

        table = sanitize(table)
        seed = self.seed

        if signClassName == 'ALL':
            for signClass in ALL_SIGNATURES:
                self.run_test(table + '_' + sanitize(signClass.name),
                              default_vis, num_entries, num_rows,
                              signClass.name, write, benchmark)
            return

        signClass = SIGNATURE_FUNCTIONS[signClassName]

        pubkey, privkey = signClass.test_keys()

        if write:
            signer = AccumuloSigner(privkey, sig_f=signClass)

            if not seed:
                # set a new seed if one wasn't specified
                seed = str(time.time())

            generate_data(self.filename,
                          seed,
                          default_vis=default_vis,
                          num_entries=num_entries,
                          num_rows=num_rows)

            sout = write_and_sign_data(self.filename, self.conn, table, signer,
                                       benchmark)

        vout = verify_data(self.conn, table, pubkey, benchmark)

        if benchmark:
            sign_start, sign_end = sout
            verif_success, verif_start, verif_end = vout

            print "Time taken to sign: %s" % str(sign_end - sign_start)
            print "Time taken to verify: %s" % str(verif_end - verif_start)

            return sign_end - sign_start, verif_end - verif_start

    def run_benchmarks(self,
                       table_prefix="benchmarking",
                       default_vis="default"):
        """ Benchmarks each different signature class on a variety of table
            sizes, measuring the time taken to sign & verify all entries of each
            table size with each signature algorithm.
        """

        table_prefix = sanitize(table_prefix)

        for entries, rows in self.BENCHMARKS:
            print "==============================================================="
            print "Current benchmark: %d entries over %d rows" % (entries,
                                                                  rows)
            print "==============================================================="
            print
            for signClass in SUPPORTED_SIGNATURES:
                table = "%s_%s_e%d_r%d" % (
                    table_prefix, sanitize(signClass.name), entries, rows)
                print "Benchmarking %s" % (sanitize(signClass.name))
                self.run_test(table,
                              default_vis,
                              entries,
                              rows,
                              signClass.name,
                              write=True,
                              benchmark=True)
                print

    def run_fancy_benchmarks(self,
                             table_prefix="benchmarking",
                             default_vis="default",
                             resfile="benchmark_results.csv"):
        """ Runs more benchmarks than run_benchmarks(), then writes the output
            to a file.
        """

        table_prefix = sanitize(table_prefix)

        results = []
        for entries, rows in self.FANCY_BENCHMARKS:
            print "==============================================================="
            print "Current benchmark: %d entries over %d rows" % (entries,
                                                                  rows)
            print "==============================================================="
            print
            classres = []
            for signClass in SUPPORTED_SIGNATURES:
                table = "%s_%s_e%d_r%d" % (
                    table_prefix, sanitize(signClass.name), entries, rows)
                print "Benchmarking %s" % (sanitize(signClass.name))
                sign_time, verif_time = self.run_test(table,
                                                      default_vis,
                                                      entries,
                                                      rows,
                                                      signClass.name,
                                                      write=True,
                                                      benchmark=True)
                classres.append((signClass.name, sign_time, verif_time))
                print
            results.append((entries, classres))

        print 'time to write to file'
        with open(resfile, 'w') as f:
            f.write('num entries,name,sign time,verification time\n')
            for num_entries, classres in results:
                for name, stime, vtime in classres:
                    f.write(','.join(
                        [str(num_entries), name,
                         str(stime),
                         str(vtime)]))
                    f.write('\n')
        print 'wrote to file'

    def full_benchmark(self,
                       table_prefix="full_benchmarking",
                       default_vis="default",
                       signClass=None,
                       num_entries=10000,
                       num_rows=1000):
        """ Either run a single benchmark (sign & verify) on one signature
            class, or run it with no signing class (just write & read) to get
            a baseline time.
        """

        table_prefix = sanitize(table_prefix)

        conn = self.conn

        if signClass:
            table = table_prefix + '_' + sanitize(signClass.name)
        else:
            table = table_prefix + '_baseline'

        if signClass:
            pubkey, privkey = signClass.test_keys()
            signer = AccumuloSigner(privkey, sig_f=signClass)
            start_time = time.clock()
            write_and_sign_data(self.filename,
                                conn,
                                table,
                                signer,
                                benchmark=False)
            end_time = time.clock()
            total_sign_time = end_time - start_time

            start_time = time.clock()
            verify_data(conn, table, pubkey, benchmark=False)
            end_time = time.clock()
            total_verif_time = end_time - start_time
        else:
            start_time = time.clock()
            write_data(self.filename, conn, table)
            end_time = time.clock()
            total_sign_time = end_time - start_time

            count = 0
            start_time = time.clock()
            for entry in conn.scan(table):
                count += 1
            end_time = time.clock()
            total_verif_time = end_time - start_time

        return (total_sign_time, total_verif_time)

    def run_full_benchmarks(self,
                            table_prefix="full_benchmarking",
                            default_vis="default",
                            num_entries=10000,
                            num_rows=1000,
                            outfile='full_benchmark_out.csv'):
        """ Benchmark each signing algorithm, writing the results to a file,
            and comparing them to a baseline write & read with no signatures.
        """

        table_prefix = sanitize(table_prefix)

        n = generate_data(self.filename,
                          self.seed,
                          default_vis=default_vis,
                          num_entries=num_entries,
                          num_rows=num_rows)

        base_write_time, base_read_time = self.full_benchmark(
            table_prefix, default_vis, None, num_entries, num_rows)

        with open(outfile, 'w') as f:
            bw = (base_write_time / n) * 1000
            br = (base_read_time / n) * 1000
            f.write(','.join(['name', 'signing time', 'verification time']))
            f.write('\n')
            f.write(','.join(['baseline', str(bw), str(br)]))
            f.write('\n')
            for signClass in SUPPORTED_SIGNATURES:
                (st, vt) = self.full_benchmark(table_prefix, default_vis,
                                               signClass, num_entries,
                                               num_rows)

                # convert seconds for the whole batch to milliseconds
                # per element
                st = (st / n) * 1000
                vt = (vt / n) * 1000

                f.write(','.join([signClass.name, str(st), str(vt)]))
                f.write('\n')

    def fastfail_benchmark(self, table):
        """ Check how long it takes just to read each element from a table,
            to see if there's a difference because of the changed visibility
            fields in signed tables.
        """

        table = sanitize(table)

        start = time.clock()

        total = 0
        for e in self.conn.scan(table):
            total += 1
        end = time.clock()

        return end - start

    def run_fastfail_benchmarks(self,
                                table_prefix="fastfail_benchmarking",
                                default_vis="default",
                                num_rows=1000,
                                num_noisy_entries=50000,
                                num_noisy_rows=1000,
                                outfile='fastfail_benchmark_out_2.csv',
                                num_trials=100,
                                one_vis=False):
        """ Benchmark to see how much overhead there is from the signature code
            making Accumulo unable to fast-fail and cache results from
            visibility field checks.

            If one_vis is False, it will randomly generate a default visibility
            value for each field. If it is a string, that string will be treated
            as the default visibility value for each 'noise' field.
        """

        table_prefix = sanitize(table_prefix)

        seed = self.seed
        noisy_filename = 'noisy_' + self.filename

        if not seed:
            # set a new seed if one wasn't specified
            seed = str(time.time())

        if one_vis:
            print 'generating noise with one visibility field'
            generate_data(noisy_filename,
                          seed,
                          vis=False,
                          default_vis=one_vis,
                          num_entries=num_noisy_entries,
                          num_rows=num_rows)
        else:
            print 'generating noise with random visibility fields'
            generate_data(noisy_filename,
                          seed,
                          vis=True,
                          num_entries=num_noisy_entries,
                          num_rows=num_rows)

        noisy_table = 'noisy_' + table_prefix

        write_data(noisy_filename, self.conn, noisy_table)

        for sc in SUPPORTED_SIGNATURES:
            pubkey, privkey = sc.test_keys()
            signer = AccumuloSigner(privkey, sig_f=sc)
            write_and_sign_data(noisy_filename, self.conn,
                                '_'.join([table_prefix,
                                          sanitize(sc.name)]), signer)

        all_times = []

        for n in [(num_noisy_entries / 10000) * (10**i) for i in range(6)]:

            print 'n:', n

            generate_data(self.filename,
                          str(time.time()),
                          default_vis=default_vis,
                          num_entries=n,
                          num_rows=min(n, num_rows))
            write_data(self.filename, self.conn, noisy_table)

            base_time = sum([
                self.fastfail_benchmark(noisy_table) for j in range(num_trials)
            ])
            times = []

            for signClass in SUPPORTED_SIGNATURES:

                pubkey, privkey = signClass.test_keys()
                signer = AccumuloSigner(privkey, sig_f=signClass)
                table = '_'.join([table_prefix, sanitize(signClass.name)])

                write_and_sign_data(self.filename, self.conn, table, signer)

                times.append((signClass.name,
                              sum([
                                  self.fastfail_benchmark(table)
                                  for j in range(num_trials)
                              ])))

            all_times.append((n, base_time, times))

        with open(outfile, 'w') as f:
            for num_elems, base_time, trials in all_times:

                print 'Trial for %d elements. Base time: %s' % (num_elems,
                                                                str(base_time))

                f.write('%d,BASE,%s\n' % (num_elems, str(base_time)))

                for name, ttime in trials:
                    print '\t%s: %s' % (name, str(ttime))
                    f.write('%d,%s,%s\n' % (num_elems, name, str(ttime)))
                print

    def id_test(self,
                table_prefix="id_test",
                default_vis="default",
                num_entries=10000,
                num_rows=1000):

        table_prefix = sanitize(table_prefix)

        generate_data(self.filename,
                      self.seed,
                      default_vis=default_vis,
                      num_entries=num_entries,
                      num_rows=num_rows)

        for signer_id, sigclass in self.signer_ids:

            _, privkey = sigclass.test_keys()
            table = table_prefix + '_' + sanitize(signer_id)

            signer = AccumuloSigner(privkey,
                                    sig_f=sigclass,
                                    signerID=signer_id)
            write_and_sign_data(self.filename, self.conn, table, signer)
            verify_data(self.conn, table, self.pki, sigclass)

    def table_test(self,
                   table_prefix="table_test1",
                   default_vis="default",
                   num_entries=10000,
                   num_rows=1000):

        table_prefix = sanitize(table_prefix)

        generate_data(self.filename,
                      self.seed,
                      default_vis=default_vis,
                      num_entries=num_entries,
                      num_rows=num_rows)

        for signer_id, sigclass in self.signer_ids:

            _, privkey = sigclass.test_keys()

            table = table_prefix + '_' + sanitize(signer_id)

            signer = AccumuloSigner(privkey, sig_f=sigclass)
            write_and_sign_data(self.filename,
                                self.conn,
                                table,
                                signer,
                                include_table=True)
            verif_key, _ = self.pki.get_verifying_key(signer_id)
            verify_data(self.conn, table, verif_key, False, include_table=True)

    def location_test(self,
                      cfg_file,
                      table_prefix="table_test1",
                      default_vis="default",
                      num_entries=10000,
                      num_rows=1000):

        table_prefix = sanitize(table_prefix) + '_' + sanitize(loc)

        generate_data(self.filename,
                      self.seed,
                      default_vis=default_vis,
                      num_entries=num_entries,
                      num_rows=num_rows)

        for signer_id, sigclass in self.signer_ids:

            _, privkey = sigclass.test_keys()

            table = table_prefix + '_' + sanitize(signer_id)

            conf = new_config(cfg_file, self.conn)

            signer = AccumuloSigner(privkey, sig_f=sigclass, conf=conf)
            write_and_sign_data(self.filename, self.conn, table, signer)
            verif_key, _ = self.pki.get_verifying_key(signer_id)
            verify_data(self.conn, table, verif_key, False, conf=conf)
Exemple #4
0
table = "pythontest"

conn = Accumulo(host=settings.HOST, port=settings.PORT, user=settings.USER, password=settings.PASSWORD)

if conn.table_exists(table):
    conn.delete_table(table)

conn.create_table(table)
wr = conn.create_batch_writer(table)

print "Ingesting some data ..."
for num in range(1, 100):
    label = '%03d'%num
    mut = Mutation('r_%s'%label)
    mut.put(cf='cf_%s'%label, cq='cq1', val='value_%s'%label)
    mut.put(cf='cf_%s'%label, cq='cq2', val='value_%s'%label)
    wr.add_mutation(mut)
wr.close()


print "Rows 001 through 003 ..."
for entry in conn.scan(table, scanrange=Range(srow='r_001', erow='r_003'), cols=[]):
    print entry

print "Rows 001 and 011 ..."
for entry in conn.batch_scan(table, scanranges=[Range(srow='r_001', erow='r_001'), Range(srow='r_011', erow='r_011')]):
    print entry

conn.close()
Exemple #5
0
sumarray = SummingArrayCombiner(priority=11)
sumarray.add_column("histo")
sumarray.attach(conn, table)

mincom = MinCombiner(priority=12)
mincom.add_column("min")
mincom.attach(conn, table)

maxcom = MaxCombiner(priority=13)
maxcom.add_column("max")
maxcom.attach(conn, table)

wr = conn.create_batch_writer(table)

for num in range(0, 1000):
    m = Mutation("row")
    m.put(cf="sum", cq="cq", val="%d" % num)
    m.put(cf="count", cq="cq", val="%d" % 1)
    m.put(cf="min", cq="cq", val="%d" % num)
    m.put(cf="max", cq="cq", val="%d" % num)
    m.put(cf="histo",
          cq="cq",
          val=",".join([str(x) for x in [1, 2, 3, 4, 5, 6, 7, 8, 9]]))

    wr.add_mutation(m)
wr.close()

for e in conn.scan(table):
    print(e)

conn.close()
class EzRPStaticStore(object):

    '''
    Class to save and retrieve static content from Accumulo.
    cf = "static"                   For all rows
    cq = "hash"                     Stores the hash_value of Static File
    cq = "nofchunks"                Stores the number of Chunks needed to store Static File
    cq = "chunk_000" .. "chunk_nnn" Stores the Chunks of Static File
    '''
    def __init__(self, host="localhost", port=42424, user='******', password='******', chunk_size=int(5*1048576), logger=None):
        self.__host = host
        self.__port = port
        self.__user = user
        self.__password = password
        self.__table = 'ezfrontend'
        self.__cf = 'static'
        self.__connection = None

        if logger is not None:
            self.__log = logger
        else:
            self.__log = logging.getLogger(self.__module__ + '.' + self.__class__.__name__)
            self.__log.addHandler(logging.NullHandler())

        self.__chunk_size =int(chunk_size)
        self._connect(self.__host, self.__port, self.__user, self.__password)

    def _connect(self, host, port, user, password):
        try:
            self.__connection = Accumulo(host, port, user, password)
            self.__log.debug('Connected to StaticFile Store')
        except Exception as e:
            self.__log.exception('Error while connecting to StaticFile Store: %s' % str(e))
            raise Exception('Error while connecting to StaticFile Store: %s' % str(e))

    def _ensureTableExists(self):
        '''
        Make sure that the table exists before any other operation.
        Reconnect to Accumulo if the Connection is reset.
        '''
        if not self.__connection.table_exists(self.__table):
            self.__log.info('table "{table}" does not exist in StaticFile Store. Creating the table'.format(table=self.__table))
            self.__connection.create_table(self.__table)
            if not self.__connection.table_exists(self.__table):
                self.__log.error('Unable to ensure StaticFile Store table "{table} exists'.format(format(table=self.__table)))
                raise Exception('StaticFile Store:  Unable to ensure table "{table}" exists'.format(table=self.__table))

    def _ensureNoDuplicates(self, usrFacingUrlPrefix):
        '''
         Ensure a single copy of file for a given usrFacingUrlPrefix
        '''
        if self._getHash(usrFacingUrlPrefix) is not None:
            self.deleteFile(usrFacingUrlPrefix)

    def _putNofChunks(self, usrFacingUrlPrefix, length):
        '''
        Put the number of chunks the static contents is stored
        '''

        chunks = int(math.ceil(length / float(self.__chunk_size)))
        writer = self.__connection.create_batch_writer(self.__table)
        m = Mutation(usrFacingUrlPrefix)
        m.put(cf=self.__cf, cq="nofchunks", val=str(chunks))
        writer.add_mutation(m)
        writer.close()

    def _getNofChunks(self, usrFacingUrlPrefix):
        '''
        Get the number of chunks the static contents is stored
        '''
        scan_range = Range(srow=usrFacingUrlPrefix, scf=self.__cf, scq="nofchunks",
                           erow=usrFacingUrlPrefix, ecf=self.__cf, ecq="nofchunks")
        for entry in self.__connection.scan(self.__table, scanrange=scan_range):
            return int(entry.val)
        return 0

    def _getChunks(self, data):
        '''
        Break the blob into CHUNK_SIZE.
        less than maxFrameSize in Accumulo proxy.properties
        '''
        data_length = len(data)
        for i in range(0, data_length + 1, self.__chunk_size):
            yield data[i:i + self.__chunk_size]

    def _putHash(self, usrFacingUrlPrefix, hash_str):
        '''
        Puts the Hash for usrFacingUrlPrefix
        '''
        writer = self.__connection.create_batch_writer(self.__table)
        m = Mutation(usrFacingUrlPrefix)
        m.put(cf=self.__cf, cq="hash", val=hash_str)
        writer.add_mutation(m)
        writer.close()

    def _getHash(self, usrFacingUrlPrefix):
        scan_range = Range(srow=usrFacingUrlPrefix, scf=self.__cf, scq="hash",
                           erow=usrFacingUrlPrefix, ecf=self.__cf, ecq="hash")
        for entry in self.__connection.scan(self.__table, scanrange=scan_range):
            return str(entry.val)
        else:
            return None

    def reConnection(self):
        self._connect(self.__host, self.__port, self.__user, self.__password)

    def putFile(self, usrFacingUrlPrefix, hash_str, data):
        self._ensureTableExists()
        self._ensureNoDuplicates(usrFacingUrlPrefix)
        self._putHash(usrFacingUrlPrefix, hash_str)
        data_length = len(data)
        self._putNofChunks(usrFacingUrlPrefix, data_length)
        writer = self.__connection.create_batch_writer(self.__table)
        for i, chunk in enumerate(self._getChunks(data)):
            m = Mutation(usrFacingUrlPrefix)
            m.put(cf=self.__cf, cq="chunk_{number:010d}".format(number=i), val=chunk)
            writer.add_mutation(m)
        self.__log.debug('added static file for "{url}" with hash "{hash}" of length "{length}"'.format(url=usrFacingUrlPrefix, hash=hash_str, length=data_length))
        writer.close()

    def getFile(self, usrFacingUrlPrefix):
        '''
        Assembles all the chunks for this row
        '''
        self._ensureTableExists()
        data = array.array('c') # Create a byte array
        chunks = self._getNofChunks(usrFacingUrlPrefix)
        chunks_read = 0
        for i in range(chunks):
             cq = 'chunk_{number:010d}'.format(number=i)
             for entry in self.__connection.scan(self.__table, None, cols=[[self.__cf, cq]]):
                 if entry.row == usrFacingUrlPrefix and entry.cq.startswith("chunk_"):
                     chunks_read += 1
                     data.extend(entry.val)

        # This code gets following error while retrieving over 96MB.  Data stops at first chunk_000
        # # java.lang.OutOfMemoryError: Java heap space
        # -XX:OnOutOfMemoryError="kill -9 %p"
        #   Executing /bin/sh -c "kill -9 32597"...
        # [1]+  Exit 137  sudo -u accumulo /opt/accumulo/current/bin/accumulo proxy -p /opt/accumulo/current/conf/proxy.properties

        # startChunk = "chunk_{number:010d}".format(number=0)
        # endChunk = "chunk_{number:010d}".format(number=chunks)
        # scan_range = Range(srow=usrFacingUrlPrefix, scf=self.__cf, scq=startChunk,
        #                    erow=usrFacingUrlPrefix, ecf=self.__cf, ecq=endChunk)
        # for entry in self.__connection.scan(self.__table, scanrange=scan_range):
        #     #self.__log.info("getFile: row = {0} cq= {1}".format(entry.row, entry.cq))
        #     if entry.cq.startswith("chunk_"):
        #         self.__log.info("getFile: row = {0} cq= {1}".format(entry.row, entry.cq))
        #         chunks_read += 1
        #         data.extend(entry.val)
        self.__log.debug('retrieved static file for {url}'.format(url=usrFacingUrlPrefix))
        if chunks_read != chunks:
            self.__log.error("did not read all the chunks from StaticFile Store")
        return data.tostring() if data.buffer_info()[1] > 0 else None

    def deleteFile(self, usrFacingUrlPrefix):
        self._ensureTableExists()
        writer = self.__connection.create_batch_writer(self.__table)
        chunks = self._getNofChunks(usrFacingUrlPrefix)
        m = Mutation(usrFacingUrlPrefix)
        m.put(cf=self.__cf, cq="hash", is_delete=True)
        m.put(cf=self.__cf, cq="nofchunks", is_delete=True)
        for i in range(chunks):
            cq = 'chunk_{number:010d}'.format(number=i)
            m.put(cf=self.__cf, cq=cq, is_delete=True)
        writer.add_mutation(m)
        self.__log.debug('removed static file for {url}'.format(url=usrFacingUrlPrefix))
        writer.close()

    def getAttributes(self):
        '''
        Returns the urlprefix and the hash of all the entries in table as tuple
        '''
        self._ensureTableExists()
        for entry in self.__connection.scan(self.__table, None, cols=[[self.__cf, "hash"]]):
            yield (entry.row, str(entry.val))
        else:
            yield (None, None)
Exemple #7
0
summing.attach(conn, table)

sumarray = SummingArrayCombiner(priority=11)
sumarray.add_column("histo")
sumarray.attach(conn, table)

mincom = MinCombiner(priority=12)
mincom.add_column("min")
mincom.attach(conn, table)

maxcom = MaxCombiner(priority=13)
maxcom.add_column("max")
maxcom.attach(conn, table)

wr = conn.create_batch_writer(table)

for num in range(0, 1000):
    m = Mutation("row")
    m.put(cf="sum", cq="cq", val="%d"%num)
    m.put(cf="count", cq="cq", val="%d"%1)
    m.put(cf="min", cq="cq", val="%d"%num)
    m.put(cf="max", cq="cq", val="%d"%num)
    m.put(cf="histo", cq="cq", val=",".join( [str(x) for x in [1,2,3,4,5,6,7,8,9]]))

    wr.add_mutation(m)
wr.close()

for e in conn.scan(table):
    print e
    
conn.close()