def get(self, md5): self._check_connection() util.check_md5(md5) r = pyaccumulo.Range(srow=md5, erow=md5, scf='file{', ecf='file}') entries = [x for x in self.conn.scan(self.file_table, scanrange=r)] if len(entries) == 0: return None projects = [] paths = [] size = None tags = False for entry in self.conn.scan(self.file_table, scanrange=r): if entry.cf == 'file|path': paths.append(entry.cq) elif entry.cf == 'file|project': projects.append(entry.cq) elif entry.cf == 'file|size' and entry.cq == 'size': size = int(entry.val) elif entry.cf == 'twosix' and entry.cq == 'tags': tags = bool(entry.val) elif entry.cf == 'file|content' and entry.cq == 'content': content = entry.val else: raise ValueError('cf {}, cq {} not recognized'.format( entry.cf, entry.cq)) return projects, paths, size, content, tags
def range(self, start, end): """ Return md5s in the range of (start, end) """ self._check_connection() hashes = set() r = pyaccumulo.Range(srow=start, erow=end) for entry in self.conn.scan(self.file_table, scanrange=r, cols=[['file|size']]): hashes.add(entry.row) return sorted(list(hashes))
def prefix(self, pre): """ Return md5s starting with the prefix """ self._check_connection() hashes = set() r = pyaccumulo.Range(srow=pre, erow=pre + '}') for entry in self.conn.scan(self.file_table, scanrange=r, cols=[['file|size']]): hashes.add(entry.row) return sorted(list(hashes))
def range_uuid_name(self, start, end): """ Return a list of tuples from md5 to filename """ self._check_connection() md5 = None r = pyaccumulo.Range(srow=start, erow=end) for entry in self.conn.scan(self.file_table, scanrange=r, cols=[['file|path']]): if entry.row != md5: md5 = entry.row filepath = entry.cq uuid = filepath[:36] filename = os.path.basename(filepath) yield md5, uuid, filename
def read(self, md5): util.check_md5(md5) r = pyaccumulo.Range(srow=md5, erow=md5, scf='file{', ecf='file}') entries = [x for x in self.conn.scan(self.file_table, scanrange=r)] if len(entries) == 0: return None projects = [] paths = [] size = None for entry in entries: if entry.cf == 'file|path': paths.append(entry.cq) elif entry.cf == 'file|project': projects.append(entry.cq) elif entry.cf == 'file|size' and entry.cq == 'size': size = int(entry.val)
def run_search(conn=None, table='demo', config_name = 'expressive', PKI_object=None, row_range='Analytics'): """ Retrieves and decrypts values from the specified table, outputting the appropriate error messages if not. Arguments: conn - the Accumulo connection to use data - the list to insert into the Accumulo table table - the name of the table to insert to config_filept - file pointer to the configuration file for encryption PKI_object - matches the interface on encryption PKI OBJECT, default is DummyEncryptionPKI row_range - the keyword to search for """ if conn is None: conn = pyaccumulo.Accumulo(host='localhost', port=42424, user='******', password='******') if PKI_object is None: PKI_object = DummyEncryptionPKI(conn=conn) total = 0 enc_config_filept = StringIO(schema[config_name]) dec_config_filept = StringIO(schema[config_name]) encrypter = AccumuloEncrypt(enc_config_filept, PKI_object) enc_row, _ = encrypter.encrypt_search(row_range, None) range = pyaccumulo.Range(srow = enc_row, sinclude = True, erow = enc_row, einclude = True) for entry in conn.scan(table, scanrange=range): total = total + 1 try: cell = encrypter.decrypt(entry) print "Entry: (%s, %s, %s, %s)" % (cell.row, cell.cf, cell.cq, cell.val) except DecryptionException as ve: print 'Error: Entry failed to decrypt.' print 'Error message:', ve.msg print print 'Finished, decrypted %d total entries.' %(total)