def transaction(client, processed_crash=processed_crash): processed_crash = processed_crash.copy() self._stringify_dates_in_dict(processed_crash) crash_id = processed_crash['uuid'] row_id = crash_id_to_row_id(crash_id) processing_state = self._get_report_processing_state( client, crash_id) submitted_timestamp = processing_state.get( 'timestamps:submitted', processed_crash.get('date_processed', 'unknown')) if processing_state.get('flags:processed', '?') == 'N': index_row_key = crash_id_to_timestamped_row_id( crash_id, submitted_timestamp) client.atomicIncrement('metrics', 'crash_report_queue', 'counters:current_unprocessed_size', -1) client.deleteAllRow('crash_reports_index_unprocessed_flag', index_row_key) processed_timestamp = processed_crash['completeddatetime'] if 'signature' in processed_crash: if len(processed_crash['signature']) > 0: signature = processed_crash['signature'] else: signature = '##empty##' else: signature = '##null##' mutations = [] mutations.append( Mutation(column="timestamps:processed", value=processed_timestamp)) mutations.append( Mutation(column="processed_data:signature", value=signature)) processed_crash_as_json_string = json.dumps(processed_crash) mutations.append( Mutation(column="processed_data:json", value=processed_crash_as_json_string)) mutations.append(Mutation(column="flags:processed", value="Y")) mutation_size = (len(processed_timestamp) + len(signature) + len(processed_crash_as_json_string) + 1) start_timestamp = utc_now() try: client.mutateRow('crash_reports', row_id, mutations) finally: end_timestamp = utc_now() self.config.logger.debug( 'mutation size for row_id %s: %s, execution time: %s', row_id, mutation_size, end_timestamp - start_timestamp) sig_ooid_idx_row_key = signature + crash_id client.mutateRow('crash_reports_index_signature_ooid', sig_ooid_idx_row_key, [Mutation(column="ids:ooid", value=crash_id)])
def writeTicks(self, ticks): ''' read quotes ''' tName = self.tableName(HBaseDAM.TICK) if tName not in self.__hbase.getTableNames(): self.__hbase.createTable(tName, [ColumnDescriptor(name=HBaseDAM.TICK, maxVersions=5)]) for tick in ticks: self.__hbase.updateRow(self.tableName(HBaseDAM.TICK), tick.time, [Mutation(column = "%s:%s" % (HBaseDAM.TICK, field), value = getattr(tick, field) ) for field in TICK_FIELDS])
def writeQuotes(self, quotes): ''' write quotes ''' tName = self.tableName(HBaseDAM.QUOTE) if tName not in self.__hbase.getTableNames(): self.__hbase.createTable(tName, [ColumnDescriptor(name=HBaseDAM.QUOTE, maxVersions=5)]) for quote in quotes: self.__hbase.updateRow(self.tableName(HBaseDAM.QUOTE), quote.time, [Mutation(column = "%s:%s" % (HBaseDAM.QUOTE, field), value = getattr(quote, field) ) for field in QUOTE_FIELDS])
def save_to_hbase(userid, company, data): table = Testingconfig.HBASE_TABLE socket.open() row = str(userid) + company mutations = [Mutation(column="info:current", value=json.dumps(data))] client.mutateRow(table, row, mutations) socket.close() suc_msg = { "msg": "", "code": 0, } return suc_msg
def commit(self): ''' complete write operation ''' if not self.tableName: raise UfException(Errors.TABLENAME_NOT_SET, "Table name not set") # reset table with all cols first cols = set() for (row, col) in self.__writeCache.iterkeys(): cols.add(col) self.resetCols(cols) # write values for (row, col), value in self.__writeCache.iteritems(): self.__hbase.updateRow( self.tableName, row, [Mutation(column="%s:" % col, value=str(value))])
def transaction(client): row_id = crash_id_to_row_id(crash_id) submitted_timestamp = raw_crash['submitted_timestamp'] legacy_processing = raw_crash.get('legacy_processing', False) columns = [("flags:processed", "N"), ("meta_data:json", json.dumps(raw_crash)), ("timestamps:submitted", submitted_timestamp), ("ids:ooid", crash_id)] for key, dump in dumps.iteritems(): if key in (None, '', 'upload_file_minidump'): key = 'dump' columns.append(('raw_data:%s' % key, dump)) mutations = [ Mutation(column=c, value=v) for c, v in columns if v is not None ] indices = [ 'crash_reports_index_submitted_time', 'crash_reports_index_unprocessed_flag' ] if legacy_processing == 0: mutations.append( Mutation(column="flags:legacy_processing", value='Y')) indices.append('crash_reports_index_legacy_unprocessed_flag') indices.append('crash_reports_index_legacy_submitted_time') process_type = raw_crash.get('ProcessType', 'default') is_hang = 'HangID' in raw_crash if is_hang: hang_id = raw_crash['HangID'] mutations.append(Mutation(column="ids:hang", value=hang_id)) client.mutateRow('crash_reports', row_id, mutations) self._put_crash_report_indices(client, crash_id, submitted_timestamp, indices) if is_hang: # Put the hang's indices. ooid_column_name = "ids:ooid:" + process_type client.mutateRow( 'crash_reports_index_hang_id_submitted_time', crash_id_to_timestamped_row_id(hang_id, submitted_timestamp), [Mutation(column=ooid_column_name, value=crash_id)]) client.mutateRow( 'crash_reports_index_hang_id', hang_id, [Mutation(column=ooid_column_name, value=crash_id)]) # update the metrics time_levels = [ submitted_timestamp[:16], # minute yyyy-mm-ddTHH:MM submitted_timestamp[:13], # hour yyyy-mm-ddTHH submitted_timestamp[:10], # day yyyy-mm-dd submitted_timestamp[:7], # month yyyy-mm submitted_timestamp[:4] # year yyyy ] counter_increments = ['counters:submitted_crash_reports'] counter_increments.append( "counters:submitted_crash_reports_legacy_throttle_%d" % legacy_processing) if process_type != 'default': if is_hang: counter_increments.append( "counters:submitted_crash_report_hang_pairs") else: counter_increments.append( "counters:submitted_oop_%s_crash_reports" % process_type) client.atomicIncrement('metrics', 'crash_report_queue', 'counters:current_unprocessed_size', 1) if legacy_processing == 0: client.atomicIncrement( 'metrics', 'crash_report_queue', 'counters:current_legacy_unprocessed_size', 1) for rowkey in time_levels: for column in counter_increments: client.atomicIncrement('metrics', rowkey, column, 1)
def _put_crash_report_indices(self, client, crash_id, timestamp, indices): row_id = crash_id_to_timestamped_row_id(crash_id, timestamp) for index_name in indices: client.mutateRow(index_name, row_id, [Mutation(column="ids:ooid", value=crash_id)])
assert not h.getTableNames() #create table tName = 'testTable' h.createTable(tName, [ColumnDescriptor(name='col1', maxVersions=5), ColumnDescriptor(name='col2', maxVersions=5)]) print h.getTableNames() assert h.getTableNames() print "column families in %s" %(tName) print h.getColumnDescriptors(tName) #updateRow h.updateRows(tName, "bar", [Mutation(column="col1:bar", value='12345'), Mutation(column="col2:", value="67890")]) h.updateRows(tName, "foo", [Mutation(column="col1:foo", value='12345')]) print h.getRow(tName, 'bar') print h.getRow(tName, 'foo') #scan table rows = h.scanTable(tName, columns=["col1", "col2"]) print rows assert 2 == len(rows) rows = h.scanTable(tName, columns=["col1", "col2"], startRow="foo") print rows assert 1 == len(rows) rows = h.scanTable(tName, columns=["col1", "col2"], endRow="foo") print rows
cols = client.getColumnDescriptors(t) print "column families in %s" % (t) for col_name in cols.keys(): col = cols[col_name] print " column: %s, maxVer: %d" % (col.name, col.maxVersions) dummy_attributes = {} # # Test UTF-8 handling # invalid = "foo-\xfc\xa1\xa1\xa1\xa1\xa1" valid = "foo-\xE7\x94\x9F\xE3\x83\x93\xE3\x83\xBC\xE3\x83\xAB" # non-utf8 is fine for data mutations = [Mutation(column="entry:foo", value=invalid)] print str(mutations) client.mutateRow(t, "foo", mutations, dummy_attributes) # try empty strings mutations = [Mutation(column="entry:", value="")] client.mutateRow(t, "", mutations, dummy_attributes) # this row name is valid utf8 mutations = [Mutation(column="entry:foo", value=valid)] client.mutateRow(t, valid, mutations, dummy_attributes) # non-utf8 is not allowed in row names try: mutations = [Mutation(column="entry:foo", value=invalid)] client.mutateRow(t, invalid, mutations, dummy_attributes)
# for e in range(100, 0, -1): # format row keys as "00000" to "00100" row = "%0.5d" % (e) mutations = [Mutation({"column": "unused:", "value": "DELETE_ME"})] client.mutateRow(t, row, mutations) printRow(client.getRow(t, row)) client.deleteAllRow(t, row) mutations = [Mutation({"column": "entry:num", "value": "0"}), Mutation({"column": "entry:foo", "value": "FOO"})] client.mutateRow(t, row, mutations) printRow(client.getRow(t, row)) mutations = [] m = Mutation() m.column = "entry:foo" m.isDelete = 1 mutations.append(m) m = Mutation() m.column = "entry:num" m.value = "-1" mutations.append(m) client.mutateRow(t, row, mutations) printRow(client.getRow(t, row)) mutations = [ Mutation({"column": "entry:num", "value": str(e)}), Mutation({"column": "entry:sqr", "value": str(e * e)}), ] client.mutateRow(t, row, mutations)
except AlreadyExists, ae: print "WARN: " + ae.message cols = client.getColumnDescriptors(t) print "column families in %s" % (t) for col_name in cols.keys(): col = cols[col_name] print " column: %s, maxVer: %d" % (col.name, col.maxVersions) # # Test UTF-8 handling # invalid = "foo-\xfc\xa1\xa1\xa1\xa1\xa1" valid = "foo-\xE7\x94\x9F\xE3\x83\x93\xE3\x83\xBC\xE3\x83\xAB" # non-utf8 is fine for data mutations = [Mutation({"column": "entry:foo", "value": invalid})] client.mutateRow(t, "foo", mutations) # try empty strings mutations = [Mutation({"column": "entry:", "value": ""})] client.mutateRow(t, "", mutations) # this row name is valid utf8 mutations = [Mutation({"column": "entry:foo", "value": valid})] client.mutateRow(t, valid, mutations) # non-utf8 is not allowed in row names try: mutations = [Mutation({"column": "entry:foo", "value": invalid})] client.mutateRow(t, invalid, mutations) except ttypes.IOError, e: