def _check_encrypt_decrypt_mutation(encClass):
    '''
    Tests the encrypt then decrypt functionality of the various algorithms 
    on mutations 
    '''
    config = stringio.StringIO(
                        '[colFamily]\n'+\
                        'key_id = '+ encClass.name +'\n'+\
                        'cell_sections = colFamily,colQualifier\n'+\
                        'encryption = ' + encClass.name)
    encryptor_dict = _create_encryptor_dict(config)

    mut = Mutation('abcdefghijklmnopqrstuvwxyz')
    mut.put(cf='cf1', cq='cq1', cv='a&b', ts='12345', val='val1')
    mut.put(cf='cf2', cq='cq2', cv='a&b', ts='12345', val='val2')

    enc_muts = EncMutation(mut, encryptor_dict).encrypt()
    dec_muts = []
    for enc_mut in enc_muts:
        dec_muts.append(EncMutation(enc_mut, encryptor_dict).decrypt())
    assert_true(
        _decrypt_mutations_equal(dec_muts, mut),
        "Mutation is not correctly handled during encryption and decryption process"
    )

    assert_true(not _mutations_equal(enc_mut, mut),
                "Encryption algorithm was identity function.")
def write_data(file_in, conn, table):
    """ Just writes the data without signing it.
    """

    #Get the lines from the file
    f = open(file_in, 'r')
    lines = f.readlines()
    f.close()
    #create the table if it doesn't exist yet
    if not conn.table_exists(table):
        conn.create_table(table)

    wr = conn.create_batch_writer(table)
    m = None

    for l in lines:
        pieces = l.split('\t')
        row = pieces[0]
        m = Mutation(row)
        vis = pieces[3]
        if vis == '':
            vis = None
        m.put(cf=pieces[1], cq=pieces[2], cv=vis, val=pieces[4][:-1])
        wr.add_mutation(m)

    wr.close()
def _check_versioning(encClass):
    """
    Test the encryptions classes are properly dealing with versions.
    Versions are pulled from the DummyEncryptionPKI in encryption_pki.py
    """
    groundtruth = {
        "Pycrypto_AES_CFB": '3',
        "Pycrypto_AES_CBC": '1',
        "Pycrypto_AES_OFB": '3',
        "Pycrypto_AES_CTR": '1',
        "Pycrypto_AES_GCM": '2',
        "Pycrypto_AES_CFB": '3',
        "Pycrypto_AES_SIV": '1'
    }
    config = stringio.StringIO(
                        '[value]\n'+\
                        'key_id = '+ encClass.name +'\n'+\
                        'cell_sections = value\n'+\
                        'encryption = ' + encClass.name)
    encryptor_dict = _create_encryptor_dict(config)
    mut = Mutation('abcdefghijklmnopqrstuvwxyz')
    mut.put(val='val2')
    enc_muts = EncMutation(mut, encryptor_dict).encrypt()
    assert_true(len(enc_muts) == 1)
    enc_mut = enc_muts[0]
    assert_true(
        enc_mut.updates[0].value.rsplit('ver',
                                        1)[-1] == groundtruth[encClass.name],
        'Not grabbing the most recent version of the key')
def write_data(file_in, conn, table):
    """ Given a file with data in it (as written by generate_data),
        parse the file and write it out to the given Accumulo connection.

        Arguments:

        file_in - a string denoting the path of a file
        conn - the Accumulo connection to use
        table - the table to write to

    """

    # Create table and batch writer
    if not conn.table_exists(table):
        conn.create_table(table)
    writer = conn.create_batch_writer(table)

    # Iterate over file, add each mutation to the writer
    with open(file_in) as f:
        for line in f:
            (row, col_fam, col_qual, col_vis, val) = tuple(line.rstrip('\n').split('\t'))
            mutation = Mutation(row)
            mutation.put(cf=col_fam, cq=col_qual, cv=col_vis, val=val)
            writer.add_mutation(mutation)

    writer.close()
Example #5
0
 def remove(self, serverName):
     self.__ensureTable()
     writer = self.__dbConnection.create_batch_writer(self.__table)
     mutation = Mutation(serverName)
     mutation.put(cf=self.__cf, cq=self.__cq, is_delete=True)
     writer.add_mutation(mutation)
     writer.close()
     self.__logger.debug('removed cert/key for %s from store' % serverName)
 def _putHash(self, usrFacingUrlPrefix, hash_str):
     '''
     Puts the Hash for usrFacingUrlPrefix
     '''
     writer = self.__connection.create_batch_writer(self.__table)
     m = Mutation(usrFacingUrlPrefix)
     m.put(cf=self.__cf, cq="hash", val=hash_str)
     writer.add_mutation(m)
     writer.close()
Example #7
0
    def delete_attr(self, userid, attr):
        """ Delete an attribute from the list of attributes a given user has.
            Used for key revocation.

            Arguments:
            userid (string) - the ID of the user whose attribute to delete
            attr (string) - the attribute to delete from the user's list
        """
        mutation = Mutation(userid)
        mutation.put(cf=attr, is_delete=True)
        self.conn.write(self.user_attr_table, mutation)
    def _putNofChunks(self, usrFacingUrlPrefix, length):
        '''
        Put the number of chunks the static contents is stored
        '''

        chunks = int(math.ceil(length / float(self.__chunk_size)))
        writer = self.__connection.create_batch_writer(self.__table)
        m = Mutation(usrFacingUrlPrefix)
        m.put(cf=self.__cf, cq="nofchunks", val=str(chunks))
        writer.add_mutation(m)
        writer.close()
Example #9
0
    def put(self, serverName, certContents, keyContents):
        self.__ensureTable()

        writer = self.__dbConnection.create_batch_writer(self.__table)        
        value = self._generatePkcs12(serverName, certContents, keyContents, self._generatePassword(serverName))
    
        mutation = Mutation(serverName)
        mutation.put(cf=self.__cf, cq=self.__cq, val=value)
        writer.add_mutation(mutation)
        writer.close()
        self.__logger.debug('added cert/key contents for %s to store' % serverName)
Example #10
0
    def right(self, value):
        """ Set the value of the node to the right of this one.
            Argument must be another node.
            
            Arguments:
            value - the EmbeddedNode object to the right of `self`
        """
        assert isinstance(value, EmbeddedNode)

        m = Mutation(self.name)
        m.put(cf='child', cq='right', val=value.name)
        self.sl.conn.write(self.sl.table, m)
Example #11
0
    def delete_user(self, attr, user):
        """ Delete a user from the list of users with a given attribute.
            Used for key revocation. 

            Arguments:

            attr : string - the attribute to delete a user from
            user : string - the user to be deleted from attr
        """
        mutation = Mutation(attr)
        mutation.put(cf=user, is_delete=True)
        self.conn.write(self.attr_user_table, mutation)
def write_and_sign_data(file_in, conn, table, signer, benchmark=False,
                        include_table=False):
    """ Given a file with data in it (as written by generate_data),
        parse the file, sign it, and write it out to the given Accumulo
        connection.

        Arguments:

        file_in - a string denoting the path of a file
        conn - the Accumulo connection to use
        table - the table to write to
        signer - the Signer (as in sign.py) to sign the data with
        benchmark - whether or not to record the time it takes to
                    sign all the provided cells (defult: False)
        include_table - whether or not to include the name of the table
                        in the signature (default: False)

        Returns:

        If benchmark=True, returns a pair (start, end) containing the times
        recorded by time.clock() at the start and end of benchmarking,
        respectively.

        Otherwise, returns nothing.
    """


    # Create table and create batch writer
    if not conn.table_exists(table):
        conn.create_table(table)
    writer = conn.create_batch_writer(table)

    # Iterate over file, sign each entry individually, and add to the writer
    with open(file_in) as f:
        lines = f.readlines()

        with common_utils.Timer() as t:

            for line in lines:

                # parse entry and put it in a mutation
                (row, col_fam, col_qual, col_vis, val) = tuple(line.rstrip('\n').split('\t'))
                mutation = Mutation(row)
                mutation.put(cf=col_fam, cq=col_qual, cv=col_vis, val=val)

                # sign and write mutation
                signer.sign_mutation(mutation, table=table if include_table else None)
                writer.add_mutation(mutation)

    writer.close()
    if benchmark:
        return (t.start, t.end)
Example #13
0
def write_and_encrypt_data(file_in, conn, table, encryptor, benchmark=False):

    #Get the lines from the file
    print "Opening file", file_in
    f = open(file_in, 'r')
    lines = f.readlines()
    f.close()
    assert lines
    print "Lines (%d) are now in memory." % len(lines)
    #create the table if it doesn't exist yet
    if not conn.table_exists(table):
        conn.create_table(table)
        print table, 'table created'
    print "Beginning write."

    wr = conn.create_batch_writer(table)
    row = '340930563???poitapeoita'
    m = None

    if benchmark:
        print "Starting encrypting..."
        start = time.clock()

    mutations = []
    for l in lines:
        pieces = l.split('\t')
        if (row != pieces[0]):
            if m:
                mutations.append(m)
            row = pieces[0]
            m = Mutation(row)
        vis = pieces[3]
        m.put(cf=pieces[1], cq=pieces[2], cv=vis, val=pieces[4][:-1])
    mutations.append(m)

    if benchmark:
        print "Starting encrypting..."
        start = time.clock()

    for m in mutations:
        wr.add_mutation(encryptor.encrypt(m))

    if benchmark:
        end = time.clock()
        print "Encrypting finished!"

    wr.close()

    print "Write completed."

    if benchmark:
        return (start, end)
 def putFile(self, usrFacingUrlPrefix, hash_str, data):
     self._ensureTableExists()
     self._ensureNoDuplicates(usrFacingUrlPrefix)
     self._putHash(usrFacingUrlPrefix, hash_str)
     data_length = len(data)
     self._putNofChunks(usrFacingUrlPrefix, data_length)
     writer = self.__connection.create_batch_writer(self.__table)
     for i, chunk in enumerate(self._getChunks(data)):
         m = Mutation(usrFacingUrlPrefix)
         m.put(cf=self.__cf, cq="chunk_{number:010d}".format(number=i), val=chunk)
         writer.add_mutation(m)
     self.__log.debug('added static file for "{url}" with hash "{hash}" of length "{length}"'.format(url=usrFacingUrlPrefix, hash=hash_str, length=data_length))
     writer.close()
Example #15
0
 def _delete(self, table_name, keys):
     batch_writer = BatchWriter(conn=self.conn,
                                table=self._ns(table_name),
                                max_memory=self._max_memory,
                                latency_ms=self._latency_ms,
                                timeout_ms=self._timeout_ms,
                                threads=self._threads)
     try:
         for key in keys:
             mut = Mutation(key)
             mut.put(cf='', cq='', is_delete=True)
             batch_writer.add_mutation(mut)
     finally:
         batch_writer.close()
Example #16
0
def write_mutations(writer, shard, uuid, value, tokens):
    m = Mutation(uuid)
    m.put(cf="e", cq="", val=value)
    writer.add_mutation(m)

    m = Mutation(shard)
    for tok in tokens:
        m.put(tok, cq=uuid, val="")
        if len(m.updates) > 1000:
            writer.add_mutation(m)
            m = Mutation(shard)

    if len(m.updates) > 0:
        writer.add_mutation(m)
Example #17
0
 def exportJsonDB(json_data, frameNum):
     """ Exports the JSON data to the Accumulo database """
     conn = Accumulo(host="localhost",
                     port=50096,
                     user="******",
                     password="******")
     json_data_parsed = json.loads(
         json_data)  #put json data back into dictionary
     table = json_data_parsed['videoMetadata'][
         'videoName']  #get the video name and set that as the table name
     table = table.replace('.', '_')
     table = table.encode('ascii', 'ignore')
     if not conn.table_exists(table):
         conn.create_table(table)
     m = Mutation("row_%d" %
                  frameNum)  #table row number is the frame number
     m.put(cf="cf2", cq="cq2", val=json_data_parsed['imageBase64']
           )  #saves the frame image separately from the metadata
     if 'LabeledImage' in json_data_parsed.keys():
         m.put(cf="cf3", cq="cq3", val=json_data_parsed['LabeledImage']
               )  #saves the labeled image separately from the metadata
         json_data_parsed.pop(
             'LabeledImage',
             None)  #delete the base64 representation of the labeled frame
     json_data_parsed.pop(
         'imageBase64',
         None)  #delete the base64 representation of the frame
     json_data = json.dumps(json_data_parsed)
     m.put(cf="cf1", cq="cq1",
           val=json_data)  #set the first column to now only the metadata.
     conn.write(table, m)
     conn.close()
def _random_mutation(default_vis='default', append_vis=None):

    row = str(random.randint(0, 10))
    col = str(random.randint(0, 100000000))
    val = str(random.randint(0, 100000000))
    cq = str(random.randint(0, 100000000))
    if append_vis is None:
        cv = '|'.join([default_vis, str(random.randint(0, 100000000))])
    else:
        cv = '|'.join([default_vis, append_vis])

    m = Mutation(row)
    m.put(cf=col, cv=cv, cq=cq, val=val)

    return m
Example #19
0
def write_mutations(writer, shard, uuid, value, tokens):
    m = Mutation(shard)
    m.put(cf="e\0file", cq=uuid, val=value)
    for tok in tokens:
        m.put(cf="i", cq="%s\0file\0%s\0info" % (tok, uuid), val="")
        if len(m.updates) > 1000:
            writer.add_mutation(m)
            m = Mutation(shard)

    if len(m.updates) > 0:
        writer.add_mutation(m)
Example #20
0
    def parent(self, value):
        """ Set the value of this node's parent node. Argument must be a
            tuple of an EmbeddedNode and a boolean denoting whether the
            parent is an upper (as opposed to left) neighbor.
            
            NB: `parent` is only set in `newnode()`
        """
        parnode, from_up = value

        assert isinstance(parnode, EmbeddedNode)
        
        if from_up:
            strval = ','.join(['from_up', parnode.name])
        else:
            strval = ','.join(['from_left', parnode.name])

        m = Mutation(self.name)
        m.put(cf='parent', cq='', val=strval)
        self.sl.conn.write(self.sl.table, m)
    def decrypt(self):
        '''
        Returns a new mutation. Each portion of the cell that 
        has an associated encryptor is decrypted. Mostly 
        used for testing.
        '''
        dec_updates = self.update_dict.copy()
        for (cell_string, encryptor) in self.encryptor_dict.items():
            encryptor.encryption.decrypt_mutation(self, dec_updates,
                                                  encryptor.key_container,
                                                  cell_string,
                                                  encryptor.cell_sections)

        self.update_dict = dec_updates
        #only should be one cell since each encrypted mutation only contains one cell
        assert len(self.update_dict['row']) == 1
        mut = Mutation(self.update_dict['row'][0])
        for (row, cf, cq, cv, ts, v, dc) in zip(*self):
            mut.put(cf, cq, cv, ts, v, dc)
        return mut
 def deleteFile(self, usrFacingUrlPrefix):
     self._ensureTableExists()
     writer = self.__connection.create_batch_writer(self.__table)
     chunks = self._getNofChunks(usrFacingUrlPrefix)
     m = Mutation(usrFacingUrlPrefix)
     m.put(cf=self.__cf, cq="hash", is_delete=True)
     m.put(cf=self.__cf, cq="nofchunks", is_delete=True)
     for i in range(chunks):
         cq = 'chunk_{number:010d}'.format(number=i)
         m.put(cf=self.__cf, cq=cq, is_delete=True)
     writer.add_mutation(m)
     self.__log.debug('removed static file for {url}'.format(url=usrFacingUrlPrefix))
     writer.close()
Example #23
0
def test_clear_table(client, direct):
    client.setup_namespace({"table1": 1, "table2": 1})

    # Write some rows to table
    m = Mutation("row_1")
    m.put(cf="cf1", cq="cq1", val="1")
    m.put(cf="cf1", cq="cq1", val="2")
    direct.write(client._test_ns("table1"), m)

    # Clear table
    client.clear_table("table1")

    # Verify clear
    for entry in direct.scan(client._test_ns("table1")):
        assert False

    # Clear an empty table
    client.clear_table("table1")

    # Verify still clear
    for entry in direct.scan(client._test_ns("table1")):
        assert False
Example #24
0
def write_data(file_in, conn, table, benchmark=False):
    """ Just writes the data without signing it.
    """

    #Get the lines from the file
    f = open(file_in, 'r')
    lines = f.readlines()
    f.close()

    #create the table if it doesn't exist yet
    if not conn.table_exists(table):
        conn.create_table(table)

    wr = conn.create_batch_writer(table)
    m = None

    mutations = []
    for l in lines:
        pieces = l.split('\t')
        row = pieces[0]
        m = Mutation(row)
        vis = None
        m.put(cf=pieces[1], cq=pieces[2], cv=pieces[3], val=pieces[4][:-1])
        mutations.append(m)
    if benchmark:
        print "Starting writing..."
        start = time.clock()

    for m in mutations:
        wr.add_mutation(m)

    if benchmark:
        print "Done writing ..."
        end = time.clock()
    wr.close()

    if benchmark:
        return (start, end)
    def _add_signature(self, mutation, update, metadata, sig):
        """ Write signature metadata to a new cell in the Accumulo instance.
            The user specifies the name of the metadata table to store all this
            metadata in, the row is the stringified version of the cell tuple,
            the column family and qualifier are blank, and the signature
            metadata is stored in the value.

            NB: storing signature information in a separate column family or
                qualifier would likely be more efficient for most use cases,
                but it would require nontrivial modifications to how queries
                are handled by the user. This is something we may write later
                as a separate library, but for now, we choose this slightly
                less efficient but more compositional approach.
        """
        entry_tup = (mutation.row, update.colFamily, update.colQualifier,
                     update.colVisibility, update.deleteCell)
        cell_string = str(entry_tup)
        meta_mutation = Mutation(cell_string)
        meta_mutation.put(cf='',
                          cq='',
                          cv=update.colVisibility,
                          val=','.join([metadata, sig]))
        self.update_batch(meta_mutation)
    def encrypt(self):
        '''
        Returns a list of new mutations. Each portion of the cell that 
        has an associated encryptor is encrypted. 
        '''
        #only want to encrypt the values once
        if not self._encrypted:
            self._encrypted = True
            enc_updates = self.update_dict.copy()
            for (cell_string, encryptor) in self.encryptor_dict.items():
                enc_updates[
                    cell_string] = encryptor.encryption.encrypt_mutation(
                        self, encryptor.key_container, encryptor.cell_sections)
            self.update_dict = self._remove_unencrypted_cell_sections(
                enc_updates)

        #TODO: in the case where the row is deterministically encrypted
        # update to only produce one mutation
        muts = []
        for (row, cf, cq, cv, ts, v, dc) in zip(*self):
            mut = Mutation(row)
            mut.put(cf, cq, cv, ts, v, dc)
            muts.append(mut)
        return muts
Example #27
0
def write_mutations(writer, shard, uuid, value, tokens):
    m = Mutation(uuid)
    m.put(cf="e", cq="", val=value)
    writer.add_mutation(m)

    m = Mutation(shard)
    for tok in tokens:
        m.put(tok, cq=uuid, val="")
        if len(m.updates) > 1000:
            writer.add_mutation(m)
            m = Mutation(shard)

    if len(m.updates) > 0:
        writer.add_mutation(m)
Example #28
0
    def batch_insert(self, userid, infos):
        # Do a normal insert
        super(AccumuloAttrKeyStore, self).batch_insert(userid, infos)

        # Also add key information
        # NB: this can also be done inline to avoid iterating twice
        #     though the keystore infos, at the downside of more code
        #     duplication and less modularity.
        for keyinfo in infos:
            if not entry_exists(self.conn, self.attr_user_table,
                                keyinfo.attr, userid):
                #TODO: we could batch these writes for potentially a
                #      little bit more efficiency
                m = Mutation(keyinfo.attr)
                m.put(cf=userid, val='1')
                self.conn.write(self.attr_user_table, m)

            if not entry_exists(self.conn, self.user_attr_table,
                                userid, keyinfo.attr):
                m = Mutation(userid)
                m.put(cf=keyinfo.attr, val='1')
                self.conn.write(self.user_attr_table, m)
Example #29
0
    def remove_revoked_keys(self, userid, metadata, attr):
        """ Delete all stored key versions corresponding to the given
            revoked userid, metadata, and attribute.

            Arguments:

            self - the KeyStore object to delete elements from
            userid : string - the ID of the user whose keys are being deleted
            metadata : string - the metadata of the keys to delete
            attr : string - the attribute of the keys to delete
        """
        # Table name: metadata
        # Row: userid
        # Column family: attribute
        # Column qualifier: version
        # Visibility field: attribute (non-attr keys visible to all)
        # Value: keywrap
        del_keys = self.batch_retrieve(userid, metadata, attr)
        mutation = Mutation(userid)

        for keyinfo in del_keys:
            # Queue deletes of all these cells in a mutation
            mutation.put(cf=attr, cq=str(keyinfo.vers), cv=attr,
                         is_delete=True)

        # Write out the deletes
        self.conn.write(metadata, mutation)

        # Also need to remove this from the metadata store
        # Schema:
        #   Table - self.meta_table
        #   Row   - userid
        #   CF    - attr
        #   CQ    - metadata
        #   vis   - [empty]
        #   value - '1' (dummy value)
        mutation = Mutation(userid)
        mutation.put(cf=attr, cq=metadata, is_delete=True)
        self.conn.write(self.meta_table, mutation)
def _check_malformed_ciphertext_version(encClass):
    """
    Tests error handling in the case where the ciphertext does 
    not contain 'ver'
    """
    config = stringio.StringIO(
                        '[colFamily]\n'+\
                        'key_id = '+ encClass.name +'\n'+\
                        'cell_sections = colFamily,colQualifier\n'+\
                        'encryption = ' + encClass.name)
    encryptor_dict = _create_encryptor_dict(config)

    mut = Mutation('abcdefghijklmnopqrstuvwxyz')
    mut.put(cf='cf1', cq='cq1', cv='a&b', ts='12345', val='val1')
    mut.put(cv='c|d', cf='cf2', val='val2')
    enc_mut = EncMutation(mut, encryptor_dict)
    assert_raises(DecryptionException, enc_mut.decrypt)
    def test_with_accumulo_conn(self):
        '''
        Tests the interplay with a fake accumulo connection 
        '''
        all_sections = '[row]\n'+\
                        'key_id = table1\n'+\
                        'encryption = Pycrypto_AES_CFB\n'+\
                        '[colQualifier]\n'+\
                        'key_id = table1\n'+\
                        'encryption = Pycrypto_AES_CFB\n'+\
                        '[colFamily]\n'+\
                        'key_id = Pycrypto_AES_CFB\n'+\
                        'encryption = Pycrypto_AES_CFB\n'+\
                        '[colVisibility]\n'+\
                        'key_id = table1\n'+\
                        'encryption = Pycrypto_AES_CFB\n'+\
                        '[value]\n'+\
                        'key_id = Pycrypto_AES_CFB\n'+\
                        'encryption = Pycrypto_AES_CFB'
        #create mutation
        mut = Mutation('row1')
        mut.put(cf='cf1', cq='cq1', cv='cv1', ts=12345, val='val1')
        mut.put(cf='cf2', cq='cq2', cv='', ts=67890, val='val2')
        ae = AccumuloEncrypt(StringIO(all_sections), self.pki)
        enc_muts = ae.encrypt(mut)

        #write mutation along fake connection
        conn = FakeConnection()
        conn.create_table('enc_test')
        conn.write('enc_test', enc_muts[0])
        conn.write('enc_test', enc_muts[1])

        #create ground truth
        conn.create_table('ground')
        conn.write('ground', mut)

        #retrieve encrypted mutation
        dec_cells = []
        for c in conn.scan('enc_test'):
            dec_cells.append(ae.decrypt(c))

        gt_cells = []
        for c in conn.scan('ground'):
            gt_cells.append(c)

        self.assertEqual(sorted(gt_cells), sorted(dec_cells))
Example #32
0
    def _put(self, table_name, keys_and_values, counter_deletes=True):
        cur_bytes = 0
        max_bytes = self.thrift_framed_transport_size_in_mb * 2 ** 19
        batch_writer = BatchWriter(conn=self.conn,
                                   table=self._ns(table_name),
                                   max_memory=self._max_memory,
                                   latency_ms=self._latency_ms,
                                   timeout_ms=self._timeout_ms,
                                   threads=self._threads)
        try:
            # Because COUNTER is implemented via a summing accumulator,
            # to do a put we need to delete all of the old values before
            # restarting the sum.
            if ((self._value_types.get(table_name, str) is COUNTER and
                 counter_deletes)):
                for key, blob in keys_and_values:
                    mut = Mutation(key)
                    mut.put(cf='', cq='', is_delete=True)
                    batch_writer.add_mutation(mut)
                batch_writer.flush()

            for key, blob in keys_and_values:
                if len(key) + len(blob) + cur_bytes >= max_bytes:
                    logger.debug(
                        'len(key)=%d + len(blob)=%d + cur_bytes=%d >= '
                        'thrift_framed_transport_size_in_mb/2 = %d',
                        len(key), len(blob), cur_bytes, max_bytes)
                    logger.debug(
                        'pre-emptively sending only what has been '
                        'batched, and will send this item in next '
                        'batch.')
                    batch_writer.flush()
                    cur_bytes = 0
                cur_bytes += len(key) + len(blob)

                mut = Mutation(key)
                mut.put(cf='', cq='', val=blob)
                batch_writer.add_mutation(mut)
        finally:
            batch_writer.close()
Example #33
0
import settings
conn = Accumulo(host=settings.HOST, port=settings.PORT, user=settings.USER, password=settings.PASSWORD)

table = "regexes"
if conn.table_exists(table):
    conn.delete_table(table)
conn.create_table(table)

wr = conn.create_batch_writer(table)

license_file = "LICENSE"
linenum = 0

with file(license_file) as infile:
    for line in infile:
        linenum += 1
        
        m = Mutation(str(linenum))
        m.put(cf="e", cq="", val=line.strip())
        wr.add_mutation(m)
wr.close()

regex1 = RegExFilter(priority=21, val_regex=".*stated.*", match_substring=True, name="RegExFilter1")
regex2 = RegExFilter(priority=22, val_regex='.*patent', match_substring=True, name="RegExFilter2")
regex3 = RegExFilter(priority=23, val_regex='have made', match_substring=True, name="RegExFilter3")

for e in conn.batch_scan(table, cols=[["e"]], iterators=[regex1, regex2, regex3]):
    print e

conn.close()
Example #34
0
conn = Accumulo(host=hostname,port=42424,user="******",password="******")
=======
conn = Accumulo(host="ip-172-31-52-31.ec2.internal",port=42424,user="******",password="******")
>>>>>>> 86114ad7a1f23300dc77a08666f23a9330f63515

# Writing Mutation
table = "dat_master"
wr = conn.create_batch_writer(table)

for num in range(select_data.shape[0]):
    latitude = select_data.get_value(num,"latitude")
    longitude = select_data.get_value(num,"longitude") 
    ghash = geohash.encode(latitude,longitude)

    # A mutation is an object that represents a row in an Accumulo table
    m = Mutation(str(ghash))

    # Add the column fields
    dataset_name = str(select_data.get_value(num,"dataset_name"))
    dat_row_id = str(select_data.get_value(num,"crime_reports_row_id"))

    colqual = dataset_name + "." + dat_row_id

    m.put(cf="latitude",cq=colqual,val=str(latitude))
    m.put(cf="longitude",cq=colqual,val=str(longitude))
    m.put(cf="obs_date",cq=colqual,val=str(select_data.get_value(num,"obs_date")))
    m.put(cf="dataset_name",cq=colqual,val=dataset_name)
    m.put(cf="dataset_row_id",cq=colqual,val=dat_row_id)  
 
    # Adding the row to the table    
    wr.add_mutation(m)
    def _run_search(self, config, row, cols, correct_cells):
        '''
        Tests the encrypting search functionality
        '''
        #create range & mutation to search for
        mut1 = Mutation('arow')
        mut1.put(cf='cf1', cq='cq1', cv='', ts=1, val='val1')
        mut1.put(cf='cf2', cq='cq2', cv='', ts=2, val='val2')
        mut1.put(cf='cf1', cq='cq1', cv='', ts=3, val='val3')
        mut1.put(cf='cf2', cq='cq3', cv='', ts=4, val='val4')
        mut1.put(cf='cf3', cq='cq4', cv='', ts=5, val='val5')
        mut2 = Mutation('brow')
        mut2.put(cf='cf1', cq='cq1', cv='', ts=6, val='val1')
        mut2.put(cf='cf2', cq='cq2', cv='', ts=7, val='val2')
        ae = AccumuloEncrypt(StringIO(config), self.pki)
        enc_muts1 = ae.encrypt(mut1)
        enc_muts2 = ae.encrypt(mut2)
        enc_row, enc_cols = ae.encrypt_search(row, cols)

        #write mutation along fake connection
        conn = FakeConnection()
        conn.create_table('enc_test')
        for mut in enc_muts1 + enc_muts2:
            conn.write('enc_test', mut)

        #retrieve encrypted mutation with search
        dec_cells = []
        for c in conn.scan('enc_test',
                           scanrange=Range(srow=enc_row,
                                           erow=enc_row,
                                           sinclude=True,
                                           einclude=True),
                           cols=enc_cols):
            dec_cells.append(ae.decrypt(c))

        self.assertEqual(sorted(dec_cells), sorted(correct_cells))
Example #36
0
import settings

table = "pythontest"

conn = Accumulo(host=settings.HOST, port=settings.PORT, user=settings.USER, password=settings.PASSWORD)

if conn.table_exists(table):
    conn.delete_table(table)

conn.create_table(table)
wr = conn.create_batch_writer(table)

print "Ingesting some data ..."
for num in range(1, 100):
    label = '%03d'%num
    mut = Mutation('r_%s'%label)
    mut.put(cf='cf_%s'%label, cq='cq1', val='value_%s'%label)
    mut.put(cf='cf_%s'%label, cq='cq2', val='value_%s'%label)
    wr.add_mutation(mut)
wr.close()


print "Rows 001 through 003 ..."
for entry in conn.scan(table, scanrange=Range(srow='r_001', erow='r_003'), cols=[]):
    print entry

print "Rows 001 and 011 ..."
for entry in conn.batch_scan(table, scanranges=[Range(srow='r_001', erow='r_001'), Range(srow='r_011', erow='r_011')]):
    print entry

conn.close()
Example #37
0
from shapely.geometry import Polygon
from shapely.geometry import Point


# Import Accumulo
from pyaccumulo import Accumulo, Mutation, Range

select_data = pd.read_csv("/home/ubuntu/select_data.csv")

# Connecting to Accumulo
conn = Accumulo(host="172.31.3.218",port=42424,user="******",password="******")

table = "Plenario_data"
conn.create_table(table)
# Writing Mutation
wr = conn.create_batch_writer(table)

for num in range(select_data.shape[0]):
    if (num%100000==0):
        print num
    m = Mutation(str(select_data.get_value(num,"Geohash")))
    # A mutation is an object that represents a row in the Accumulo Table
    m.put(cf=str(select_data.get_value(num,"Formated_date")), val=select_data.get_value(num,"Descript"))
#     m.put(cf="cf2", val="%d"%num)
    # Adding the row to the table    

    wr.add_mutation(m)

wr.close()

Example #38
0
def remove_and_update(cq,q,a,b):
  mut = Mutation(q)
  mut.put(cf=a,cq=cq,is_delete=True)
  mut.put(cf=b,cq=cq)
  return conn.write(table,mut)
Example #39
0
      n=n-1
  if entry is None:
    return None
  else:
    return entry.cq


conn = Accumulo(host=settings.HOST, port=settings.PORT, user=settings.USER, password=settings.PASSWORD)
table = settings.TABLE

if sys.argv[1] == "-c":
  print "create"
  wr = conn.create_batch_writer(table)
  i=0
  q="%s:%s"%(Q,sys.argv[2])
  mut = Mutation(q)
  for entry in conn.batch_scan(table,cols=[["Genome","md5"]],numthreads=10):
     genome=entry.row
     if i%1000 == 0:
       print entry.row
     mut.put(cf=QUEUED,cq=genome)
     i=i+1
  wr.add_mutation(mut)
  wr.close()
  exit()

if sys.argv[1] == "-r":
  print "recover"
  q="%s:%s"%(Q,sys.argv[2])
  genome=randtask(q,INPROGRESS,10)
  while genome:
Example #40
0
 def label(self, value):
     # labels are represented as unstructured strings, so we can
     # just write them directly to the value field
     m = Mutation(self.name)
     m.put(cf='other', cq='label', val=value)
     self.sl.conn.write(self.sl.table, m)
Example #41
0
                port=settings.PORT,
                user=settings.USER,
                password=settings.PASSWORD)

if conn.table_exists(table):
    conn.delete_table(table)

conn.create_table(table)
wr = conn.create_batch_writer(table)

print "Ingesting some data ..."
f = open("/bdsetup/acculog.txt", "rb")
for i in range(250):
    line = f.readline().rstrip()
    label = '%04d' % i
    mut = Mutation('r_%s' % label)
    mut.put(cq='cq1', val=line)
    #mut.put(cf='cf_%s'%label, cq='cq1', val=line)
    #mut.put(cf='cf_%s'%label, cq='cq2', val=line)
    wr.add_mutation(mut)
    i += 1
wr.close()

if conn.table_exists(table1):
    conn.delete_table(table1)

conn.create_table(table1)
wr = conn.create_batch_writer(table1)

print "Ingesting some data ..."
f = open("/bdsetup/drilllogs.txt", "rb")
Example #42
0
summing.attach(conn, table)

sumarray = SummingArrayCombiner(priority=11)
sumarray.add_column("histo")
sumarray.attach(conn, table)

mincom = MinCombiner(priority=12)
mincom.add_column("min")
mincom.attach(conn, table)

maxcom = MaxCombiner(priority=13)
maxcom.add_column("max")
maxcom.attach(conn, table)

wr = conn.create_batch_writer(table)

for num in range(0, 1000):
    m = Mutation("row")
    m.put(cf="sum", cq="cq", val="%d"%num)
    m.put(cf="count", cq="cq", val="%d"%1)
    m.put(cf="min", cq="cq", val="%d"%num)
    m.put(cf="max", cq="cq", val="%d"%num)
    m.put(cf="histo", cq="cq", val=",".join( [str(x) for x in [1,2,3,4,5,6,7,8,9]]))

    wr.add_mutation(m)
wr.close()

for e in conn.scan(table):
    print e
    
conn.close()