def exportJsonDB(json_data, frameNum): """ Exports the JSON data to the Accumulo database """ conn = Accumulo(host="localhost", port=50096, user="******", password="******") json_data_parsed = json.loads( json_data) #put json data back into dictionary table = json_data_parsed['videoMetadata'][ 'videoName'] #get the video name and set that as the table name table = table.replace('.', '_') table = table.encode('ascii', 'ignore') if not conn.table_exists(table): conn.create_table(table) m = Mutation("row_%d" % frameNum) #table row number is the frame number m.put(cf="cf2", cq="cq2", val=json_data_parsed['imageBase64'] ) #saves the frame image separately from the metadata if 'LabeledImage' in json_data_parsed.keys(): m.put(cf="cf3", cq="cq3", val=json_data_parsed['LabeledImage'] ) #saves the labeled image separately from the metadata json_data_parsed.pop( 'LabeledImage', None) #delete the base64 representation of the labeled frame json_data_parsed.pop( 'imageBase64', None) #delete the base64 representation of the frame json_data = json.dumps(json_data_parsed) m.put(cf="cf1", cq="cq1", val=json_data) #set the first column to now only the metadata. conn.write(table, m) conn.close()
def new(cls, elems, lbound, rbound, coin=BaseCoin(), conn_info=ConnInfo('localhost', 42424, 'root', 'secret'), table='__ADS_metadata___', elemclass=IntElem): """ Create a new skiplist that stores all of its data inside an Accumulo instance. Arguments: cls - the class implementing this class method elems - the elements to create the skiplist over lbound, rbound - the left and right boundary elements of the list coin - the source of randomness to use (see pace.ads.skiplist.coin) conn_info - how to connect to the Accumulo instance being used table - the name of the table to store the ADS in elemclass - the class to use to store the elements in the skiplist """ sl = cls(None, lbound, rbound, coin) if conn_info is not None: # For connecting to a live Accumulo instance host, port, user, password = conn_info conn = Accumulo(host=conn_info.host, port=conn_info.port, user=conn_info.user, password=conn_info.password) else: # For testing/debug conn = FakeConnection() sl.conn = conn sl.table = table sl.elemclass = elemclass if not conn.table_exists(table): conn.create_table(table) right = cls.nodeclass.newnode(sl, None, None, rbound, True) left = cls.nodeclass.newnode(sl, None, right, lbound, True) sl.root = left for elem in elems: sl.insert(elem) return sl
""" from pyaccumulo import Accumulo, Mutation, Range import settings import sys sys.path sys.path.append('/bdsetup') table = "well_logs" table1 = "drill_logs" conn = Accumulo(host=settings.HOST, port=settings.PORT, user=settings.USER, password=settings.PASSWORD) if conn.table_exists(table): conn.delete_table(table) conn.create_table(table) wr = conn.create_batch_writer(table) print "Ingesting some data ..." f = open("/bdsetup/acculog.txt", "rb") for i in range(250): line = f.readline().rstrip() label = '%04d' % i mut = Mutation('r_%s' % label) mut.put(cq='cq1', val=line) #mut.put(cf='cf_%s'%label, cq='cq1', val=line) #mut.put(cf='cf_%s'%label, cq='cq2', val=line) wr.add_mutation(mut)
class EzRPCertStore(object): """ Wrapper class to underlying database store which hold server certs for reverse proxy """ def __init__(self, host='localhost', port=42424, user='******', password='******', table='ezfrontend', privateKey=None, logger=None): self.__table = table self.__signer = None self.__dbConnection = None self.__cf = "pfx" self.__cq = "enc" if logger is not None: self.__logger = logger else: self.__logger = logging.getLogger(self.__module__ + '.' + self.__class__.__name__) self.__logger.addHandler(logging.NullHandler()) if privateKey is not None: self.__updateSigner(privateKey) self.__connectToAccumulo(host, port, user, password) def __connectToAccumulo(self, host, port, user, password): try: self.__dbConnection = Accumulo(host, port, user, password) self.__logger.debug('Successfully connected to CertStore') except Exception as ex: self.__logger.exception('Error in connecting to CertStore: %s' % str(ex)) raise EzRPCertStoreException('Error in connecting to CertStore: %s' % str(ex)) def __updateSigner(self, privateKey): with open(privateKey) as file: self.__signer = PKCS1_v1_5.new(RSA.importKey(file.read())) self.__logger.info('Updated signer for CertStore') def __ensureTable(self): if not self.__dbConnection.table_exists(self.__table): self.__logger.info('DB table %s doesn\'t exist in the Store. Creating ...' % self.__table) self.__dbConnection.create_table(self.__table) if not self.__dbConnection.table_exists(self.__table): self.__logger.error('Unable to ensure DB table exists in the Store.') raise EzRPCertStoreException('CertStore: Unable to ensure DB table exists in the Store.') def _generatePassword(self, serverName): password = '******' #salt if self.__signer is None: password = base64.b64encode(password + serverName) else: digest = SHA256.new(password + serverName) signature = self.__signer.sign(digest) password = base64.b64encode(signature) return password def _generatePkcs12(self, serverName, certContents, keyContents, password=None): key = OpenSSL.crypto.load_privatekey(OpenSSL.crypto.FILETYPE_PEM, keyContents) cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, certContents) pfx = OpenSSL.crypto.PKCS12() pfx.set_certificate(cert) pfx.set_privatekey(key) return pfx.export(passphrase=password) def _retrieveCertAndKey(self, pfx, serverName, password=None): p12 = OpenSSL.crypto.load_pkcs12(pfx, password) keycontents = OpenSSL.crypto.dump_privatekey(OpenSSL.crypto.FILETYPE_PEM, p12.get_privatekey()) certContents = OpenSSL.crypto.dump_certificate( OpenSSL.crypto.FILETYPE_PEM, p12.get_certificate()) return certContents, keycontents def put(self, serverName, certContents, keyContents): self.__ensureTable() writer = self.__dbConnection.create_batch_writer(self.__table) value = self._generatePkcs12(serverName, certContents, keyContents, self._generatePassword(serverName)) mutation = Mutation(serverName) mutation.put(cf=self.__cf, cq=self.__cq, val=value) writer.add_mutation(mutation) writer.close() self.__logger.debug('added cert/key contents for %s to store' % serverName) def get(self, serverName): self.__ensureTable() for entry in self.__dbConnection.scan(self.__table, cols=[[self.__cf, self.__cq]]): if entry.row == serverName: self.__logger.debug('retrieved cert/key for %s from store' % serverName) return self._retrieveCertAndKey(entry.val, serverName, self._generatePassword(serverName)) return None, None def remove(self, serverName): self.__ensureTable() writer = self.__dbConnection.create_batch_writer(self.__table) mutation = Mutation(serverName) mutation.put(cf=self.__cf, cq=self.__cq, is_delete=True) writer.add_mutation(mutation) writer.close() self.__logger.debug('removed cert/key for %s from store' % serverName) def exists(self, serverName): self.__ensureTable() #use a sigle row range to narrow our scan range = Range(srow=serverName, scf=self.__cf, scq=self.__cq, erow=serverName, ecf=self.__cf, ecq=self.__cq) for entry in self.__dbConnection.scan(self.__table, scanrange=range): if entry.row == serverName: self.__logger('cert/key for %s exists in store' % serverName) return True self.__logger('cert/key for %s DOES NOT exist in store' % serverName) return False
# limitations under the License. from pyaccumulo import Accumulo from pyaccumulo.objects import Range from pyaccumulo.iterators import * import settings import sys conn = Accumulo(host=settings.HOST, port=settings.PORT, user=settings.USER, password=settings.PASSWORD) table = sys.argv[1] if not conn.table_exists(table): print("Table '%s' does not exist." % table) sys.exit(1) search_terms = [term.lower() for term in sys.argv[2:] if len(term) > 3] if len(search_terms) < 2: print("More than one term of length > 3 is required for this example") sys.exit(1) uuids = [] for e in conn.batch_scan( table, scanranges=[Range(srow="s", erow="t")], iterators=[IntersectingIterator(priority=21, terms=search_terms)]): uuids.append(e.cq)
# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from pyaccumulo import Accumulo, Mutation, Range import settings table = "pythontest" conn = Accumulo(host=settings.HOST, port=settings.PORT, user=settings.USER, password=settings.PASSWORD) if conn.table_exists(table): conn.delete_table(table) conn.create_table(table) wr = conn.create_batch_writer(table) print "Ingesting some data ..." for num in range(1, 100): label = '%03d'%num mut = Mutation('r_%s'%label) mut.put(cf='cf_%s'%label, cq='cq1', val='value_%s'%label) mut.put(cf='cf_%s'%label, cq='cq2', val='value_%s'%label) wr.add_mutation(mut) wr.close()
if len(m.updates) > 1000: writer.add_mutation(m) m = Mutation(shard) if len(m.updates) > 0: writer.add_mutation(m) try: table = sys.argv[1] input_dirs = sys.argv[2:] except: usage() conn = Accumulo(host=settings.HOST, port=settings.PORT, user=settings.USER, password=settings.PASSWORD) if not conn.table_exists(table): print "Creating table: %s"%table conn.create_table(table) wr = conn.create_batch_writer(table) for indir in input_dirs: for root, subFolders, files in os.walk(indir): for filename in files: filePath = os.path.join(root, filename) print "indexing file %s"%filePath uuid = get_uuid(filePath) with open( filePath, 'r' ) as f: write_mutations(wr, get_shard(uuid), uuid, filePath, get_tokens(f)) wr.close()
class EzRPStaticStore(object): ''' Class to save and retrieve static content from Accumulo. cf = "static" For all rows cq = "hash" Stores the hash_value of Static File cq = "nofchunks" Stores the number of Chunks needed to store Static File cq = "chunk_000" .. "chunk_nnn" Stores the Chunks of Static File ''' def __init__(self, host="localhost", port=42424, user='******', password='******', chunk_size=int(5*1048576), logger=None): self.__host = host self.__port = port self.__user = user self.__password = password self.__table = 'ezfrontend' self.__cf = 'static' self.__connection = None if logger is not None: self.__log = logger else: self.__log = logging.getLogger(self.__module__ + '.' + self.__class__.__name__) self.__log.addHandler(logging.NullHandler()) self.__chunk_size =int(chunk_size) self._connect(self.__host, self.__port, self.__user, self.__password) def _connect(self, host, port, user, password): try: self.__connection = Accumulo(host, port, user, password) self.__log.debug('Connected to StaticFile Store') except Exception as e: self.__log.exception('Error while connecting to StaticFile Store: %s' % str(e)) raise Exception('Error while connecting to StaticFile Store: %s' % str(e)) def _ensureTableExists(self): ''' Make sure that the table exists before any other operation. Reconnect to Accumulo if the Connection is reset. ''' if not self.__connection.table_exists(self.__table): self.__log.info('table "{table}" does not exist in StaticFile Store. Creating the table'.format(table=self.__table)) self.__connection.create_table(self.__table) if not self.__connection.table_exists(self.__table): self.__log.error('Unable to ensure StaticFile Store table "{table} exists'.format(format(table=self.__table))) raise Exception('StaticFile Store: Unable to ensure table "{table}" exists'.format(table=self.__table)) def _ensureNoDuplicates(self, usrFacingUrlPrefix): ''' Ensure a single copy of file for a given usrFacingUrlPrefix ''' if self._getHash(usrFacingUrlPrefix) is not None: self.deleteFile(usrFacingUrlPrefix) def _putNofChunks(self, usrFacingUrlPrefix, length): ''' Put the number of chunks the static contents is stored ''' chunks = int(math.ceil(length / float(self.__chunk_size))) writer = self.__connection.create_batch_writer(self.__table) m = Mutation(usrFacingUrlPrefix) m.put(cf=self.__cf, cq="nofchunks", val=str(chunks)) writer.add_mutation(m) writer.close() def _getNofChunks(self, usrFacingUrlPrefix): ''' Get the number of chunks the static contents is stored ''' scan_range = Range(srow=usrFacingUrlPrefix, scf=self.__cf, scq="nofchunks", erow=usrFacingUrlPrefix, ecf=self.__cf, ecq="nofchunks") for entry in self.__connection.scan(self.__table, scanrange=scan_range): return int(entry.val) return 0 def _getChunks(self, data): ''' Break the blob into CHUNK_SIZE. less than maxFrameSize in Accumulo proxy.properties ''' data_length = len(data) for i in range(0, data_length + 1, self.__chunk_size): yield data[i:i + self.__chunk_size] def _putHash(self, usrFacingUrlPrefix, hash_str): ''' Puts the Hash for usrFacingUrlPrefix ''' writer = self.__connection.create_batch_writer(self.__table) m = Mutation(usrFacingUrlPrefix) m.put(cf=self.__cf, cq="hash", val=hash_str) writer.add_mutation(m) writer.close() def _getHash(self, usrFacingUrlPrefix): scan_range = Range(srow=usrFacingUrlPrefix, scf=self.__cf, scq="hash", erow=usrFacingUrlPrefix, ecf=self.__cf, ecq="hash") for entry in self.__connection.scan(self.__table, scanrange=scan_range): return str(entry.val) else: return None def reConnection(self): self._connect(self.__host, self.__port, self.__user, self.__password) def putFile(self, usrFacingUrlPrefix, hash_str, data): self._ensureTableExists() self._ensureNoDuplicates(usrFacingUrlPrefix) self._putHash(usrFacingUrlPrefix, hash_str) data_length = len(data) self._putNofChunks(usrFacingUrlPrefix, data_length) writer = self.__connection.create_batch_writer(self.__table) for i, chunk in enumerate(self._getChunks(data)): m = Mutation(usrFacingUrlPrefix) m.put(cf=self.__cf, cq="chunk_{number:010d}".format(number=i), val=chunk) writer.add_mutation(m) self.__log.debug('added static file for "{url}" with hash "{hash}" of length "{length}"'.format(url=usrFacingUrlPrefix, hash=hash_str, length=data_length)) writer.close() def getFile(self, usrFacingUrlPrefix): ''' Assembles all the chunks for this row ''' self._ensureTableExists() data = array.array('c') # Create a byte array chunks = self._getNofChunks(usrFacingUrlPrefix) chunks_read = 0 for i in range(chunks): cq = 'chunk_{number:010d}'.format(number=i) for entry in self.__connection.scan(self.__table, None, cols=[[self.__cf, cq]]): if entry.row == usrFacingUrlPrefix and entry.cq.startswith("chunk_"): chunks_read += 1 data.extend(entry.val) # This code gets following error while retrieving over 96MB. Data stops at first chunk_000 # # java.lang.OutOfMemoryError: Java heap space # -XX:OnOutOfMemoryError="kill -9 %p" # Executing /bin/sh -c "kill -9 32597"... # [1]+ Exit 137 sudo -u accumulo /opt/accumulo/current/bin/accumulo proxy -p /opt/accumulo/current/conf/proxy.properties # startChunk = "chunk_{number:010d}".format(number=0) # endChunk = "chunk_{number:010d}".format(number=chunks) # scan_range = Range(srow=usrFacingUrlPrefix, scf=self.__cf, scq=startChunk, # erow=usrFacingUrlPrefix, ecf=self.__cf, ecq=endChunk) # for entry in self.__connection.scan(self.__table, scanrange=scan_range): # #self.__log.info("getFile: row = {0} cq= {1}".format(entry.row, entry.cq)) # if entry.cq.startswith("chunk_"): # self.__log.info("getFile: row = {0} cq= {1}".format(entry.row, entry.cq)) # chunks_read += 1 # data.extend(entry.val) self.__log.debug('retrieved static file for {url}'.format(url=usrFacingUrlPrefix)) if chunks_read != chunks: self.__log.error("did not read all the chunks from StaticFile Store") return data.tostring() if data.buffer_info()[1] > 0 else None def deleteFile(self, usrFacingUrlPrefix): self._ensureTableExists() writer = self.__connection.create_batch_writer(self.__table) chunks = self._getNofChunks(usrFacingUrlPrefix) m = Mutation(usrFacingUrlPrefix) m.put(cf=self.__cf, cq="hash", is_delete=True) m.put(cf=self.__cf, cq="nofchunks", is_delete=True) for i in range(chunks): cq = 'chunk_{number:010d}'.format(number=i) m.put(cf=self.__cf, cq=cq, is_delete=True) writer.add_mutation(m) self.__log.debug('removed static file for {url}'.format(url=usrFacingUrlPrefix)) writer.close() def getAttributes(self): ''' Returns the urlprefix and the hash of all the entries in table as tuple ''' self._ensureTableExists() for entry in self.__connection.scan(self.__table, None, cols=[[self.__cf, "hash"]]): yield (entry.row, str(entry.val)) else: yield (None, None)