def test_should_store_muliple_similar_complex_strings_with_different_timestamps_saved_out_of_order_should_be_loaded_in_order( self): source_id = 'indexed_test_3' data_name = 'evil_text' data_value1 = u'Hans-Eklunds-MacBook-Pro com.apple.backupd-auto[3780] <Notice>: Not stârting scheduled ' \ u'Time Machine backup - time machine destination not resolvable.' beastly_timestamp1 = datetime.strptime('1982-03-01T06:06:06', '%Y-%m-%dT%H:%M:%S') dto1 = TimestampedDataDTO(source_id, beastly_timestamp1, data_name, data_value1) data_value2 = u'Hans-Smiths-MacBook-Pro com.apple.backupd-auto[3780] <Notice>: Not starting scheduled ' \ u'Time Machine backup - time machine destination not resolvable.' beastly_timestamp2 = datetime.strptime('1982-03-01T06:06:08', '%Y-%m-%dT%H:%M:%S') dto2 = TimestampedDataDTO(source_id, beastly_timestamp2, data_name, data_value2) data_value3 = u'Hans-Johnssons-MacBook-Pro com.apple.backupd-auto[3780] <Notice>: Not starting scheduled ' \ u'Time Machine backup - time machine destination not resolvable.' beastly_timestamp3 = datetime.strptime('1982-03-01T06:06:07', '%Y-%m-%dT%H:%M:%S') dto3 = TimestampedDataDTO(source_id, beastly_timestamp3, data_name, data_value3) data_value4 = u'time machine destination not recoverable.' beastly_timestamp4 = datetime.strptime('1982-03-01T06:06:09', '%Y-%m-%dT%H:%M:%S') dto4 = TimestampedDataDTO(source_id, beastly_timestamp4, data_name, data_value4) self.dao.insert_timestamped_data(dto1) self.dao.insert_indexable_text_as_blob_data_and_insert_index(dto1) self.dao.insert_timestamped_data(dto2) self.dao.insert_indexable_text_as_blob_data_and_insert_index(dto2) self.dao.insert_timestamped_data(dto3) self.dao.insert_indexable_text_as_blob_data_and_insert_index(dto3) self.dao.insert_timestamped_data(dto4) self.dao.insert_indexable_text_as_blob_data_and_insert_index(dto4) # Three should be found, the last one should not be found by this search search_string = 'Notice' results = self.dao.get_blobs_by_free_text_index( source_id, data_name, search_string) self.assertEqual(len(results), 3) # Assert correct order self.assertTrue(results[0][0] < results[1][0]) self.assertTrue(results[1][0] < results[2][0]) #for result in results: #print result # Assure only one hit on this unique name search_string = 'Hans-Smiths-MacBook' results = self.dao.get_blobs_by_free_text_index( source_id, data_name, search_string) self.assertEqual(len(results), 1)
def test_should_store_data_for_several_data_names_and_load_by_index_with_date_range( self): source_id = 'unittests.indexed_test_7' data_name1 = 'evil3_text' data_name2 = 'bad3_text' data_name3 = 'nasty3_text' # Note, they differ slightly, but has common words so search can hit all of them data_value_unicode1 = u'Woe to you o örth ánd sea. For the devil sends the beast with wrath' data_value_unicode2 = u'Darn to you o örth ánd sea. For the mother sends the beast with wrath' data_value_unicode3 = u'Hey to you o örth ánd sea. For the bushes sends the beast with wrath' #data_value_utf8 = data_value_unicode.encode('utf-8') beastly_timestamp1 = datetime.strptime('1982-03-01T06:06:05', '%Y-%m-%dT%H:%M:%S') beastly_timestamp2 = datetime.strptime('1982-03-01T06:07:05', '%Y-%m-%dT%H:%M:%S') beastly_timestamp3 = datetime.strptime('1982-03-01T06:08:05', '%Y-%m-%dT%H:%M:%S') dto1 = TimestampedDataDTO(source_id, beastly_timestamp1, data_name1, data_value_unicode1) self.dao.insert_indexable_text_as_blob_data_and_insert_index(dto1) dto2 = TimestampedDataDTO(source_id, beastly_timestamp2, data_name2, data_value_unicode2) self.dao.insert_indexable_text_as_blob_data_and_insert_index(dto2) dto3 = TimestampedDataDTO(source_id, beastly_timestamp3, data_name3, data_value_unicode3) self.dao.insert_indexable_text_as_blob_data_and_insert_index(dto3) # Now make a free text search search_string = 'sea' start_time = datetime.strptime('1982-03-01T06:07:00', '%Y-%m-%dT%H:%M:%S') end_time = datetime.strptime('1982-03-01T06:07:10', '%Y-%m-%dT%H:%M:%S') result = self.dao.get_blobs_multi_data_by_free_text_index( source_id, [data_name1, data_name2, data_name3], search_string, start_time, end_time) # Should only find middle instance for given range self.assertEqual(len(result), 1) self.assertEqual(result[0][0], beastly_timestamp2) self.assertEqual(result[0][1], data_value_unicode2) # Now make same free text search with something that should NOT hit search_string = 'volvo' start_time = datetime.strptime('1982-03-01T06:07:00', '%Y-%m-%dT%H:%M:%S') end_time = datetime.strptime('1982-03-01T06:07:10', '%Y-%m-%dT%H:%M:%S') result = self.dao.get_blobs_multi_data_by_free_text_index( source_id, [data_name1, data_name2, data_name3], search_string, start_time, end_time) # Should only find middle instance for given range self.assertEqual(len(result), 0)
def test_should_build_utc_epoch_milliseconds_using_timezone_aware_datetime(self): # Both should generate the same row key utc_now = datetime.utcnow() local_now = datetime.now(tz=pytz.timezone('US/Eastern')) dto = TimestampedDataDTO('test', local_now, 'd', '0') unix_time_millis = dto.timestamp_as_unix_time_millis() print unix_time_millis
def test_should_print_row_key_for_hourly_using_naive_datetime(self): # returns naive datetime in the local timezone (ie. no timezone info) naive_local_now = datetime.now() utc_now = datetime.utcnow() dto = TimestampedDataDTO('test', naive_local_now, 'd', '0') row_key = dto.get_row_key_for_hourly() print row_key
def test_should_build_utc_epoch_milliseconds_using_timezone_aware_datetime( self): # Both should generate the same row key utc_now = datetime.utcnow() local_now = datetime.now(tz=pytz.timezone('US/Eastern')) dto = TimestampedDataDTO('test', local_now, 'd', '0') unix_time_millis = dto.timestamp_as_unix_time_millis() print unix_time_millis
def test_should_build_utc_based_row_key_for_hourly_using_timezone_aware_datetime(self): # Both should generate the same row key utc_now = datetime.utcnow() local_now = datetime.now(tz=pytz.timezone('US/Eastern')) dto = TimestampedDataDTO('test', local_now, 'd', '0') row_key_local_now = dto.get_row_key_for_hourly() dto = TimestampedDataDTO('test', utc_now, 'd', '0') row_key_utc_now = dto.get_row_key_for_hourly() self.assertEqual(row_key_utc_now, row_key_local_now)
def test_should_store_a_unicode_string_and_corresponding_indexes_and_load_by_date_range_and_index( self): source_id = 'indexed_test_1' data_name = 'evil_text' data_value_unicode = u'Woe to you o örth ánd sea. For the devil sends the beast with wrath' #data_value_utf8 = data_value_unicode.encode('utf-8') beastly_timestamp = datetime.strptime('1982-03-01T06:06:06', '%Y-%m-%dT%H:%M:%S') dto = TimestampedDataDTO(source_id, beastly_timestamp, data_name, data_value_unicode) self.dao.insert_timestamped_data(dto) self.dao.insert_indexable_text_as_blob_data_and_insert_index(dto) # All values should be received for this date range result = self.dao.get_timetamped_data_range( source_id, data_name, beastly_timestamp - timedelta(minutes=1), beastly_timestamp + timedelta(minutes=1)) #print result self.assertEqual(len(result), 1) self.assertEqual(result[0][0], beastly_timestamp) self.assertEqual(result[0][1], data_value_unicode) # Now make a free text search search_string = 'sea' result = self.dao.get_blobs_by_free_text_index(source_id, data_name, search_string) self.assertEqual(len(result), 1) self.assertEqual(result[0][0], beastly_timestamp) self.assertEqual(result[0][1], data_value_unicode)
def test_should_store_data_for_several_data_names_and_load_by_multi_data_index_search( self): source_id = 'indexed_test_6' data_name1 = 'evil3_text' data_name2 = 'bad3_text' data_name3 = 'nasty3_text' # Note, they differ slightly, but has common words so search can hit all of them data_value_unicode1 = u'Woe to you o örth ánd sea. For the devil sends the beast with wrath' data_value_unicode2 = u'Darn to you o örth ánd sea. For the mother sends the beast with wrath' data_value_unicode3 = u'Hey to you o örth ánd sea. For the bushes sends the beast with wrath' #data_value_utf8 = data_value_unicode.encode('utf-8') beastly_timestamp1 = datetime.strptime('1982-03-01T06:06:06', '%Y-%m-%dT%H:%M:%S') beastly_timestamp2 = datetime.strptime('1982-03-01T06:06:07', '%Y-%m-%dT%H:%M:%S') beastly_timestamp3 = datetime.strptime('1982-03-01T06:06:08', '%Y-%m-%dT%H:%M:%S') dto1 = TimestampedDataDTO(source_id, beastly_timestamp1, data_name1, data_value_unicode1) self.dao.insert_indexable_text_as_blob_data_and_insert_index(dto1) dto2 = TimestampedDataDTO(source_id, beastly_timestamp2, data_name2, data_value_unicode2) self.dao.insert_indexable_text_as_blob_data_and_insert_index(dto2) dto3 = TimestampedDataDTO(source_id, beastly_timestamp3, data_name3, data_value_unicode3) self.dao.insert_indexable_text_as_blob_data_and_insert_index(dto3) # Now make a free text search search_string = 'sea' result = self.dao.get_blobs_multi_data_by_free_text_index( source_id, [data_name1, data_name2, data_name3], search_string) self.assertEqual(len(result), 3) self.assertEqual(result[0][0], beastly_timestamp1) self.assertEqual(result[0][1], data_value_unicode1) self.assertEqual(result[1][0], beastly_timestamp2) self.assertEqual(result[1][1], data_value_unicode2) self.assertEqual(result[2][0], beastly_timestamp3) self.assertEqual(result[2][1], data_value_unicode3)
def test_should_insert_latest_data_with_different_timestamps_and_only_newest_should_be_loaded( self): source_id = 'latest_test_1C' # Ensure old test data is gone self.dao.remove_latest_data(source_id) # Ensure empty result result = self.dao.load_latest_data(source_id) self.assertEqual(result, {}) # Note, latest is not latest inserted, but with most recent time! self.dao.insert_latest_data( TimestampedDataDTO(source_id, self.ts('2012-05-20T06:06:05'), 'temp', '5')) self.dao.insert_latest_data( TimestampedDataDTO(source_id, self.ts('2012-05-20T06:06:07'), 'temp', '7')) # <= this must be loaded self.dao.insert_latest_data( TimestampedDataDTO(source_id, self.ts('2012-05-20T06:06:06'), 'temp', '6')) self.dao.insert_latest_data( TimestampedDataDTO(source_id, self.ts('2012-05-20T06:06:05'), 'size', '50')) self.dao.insert_latest_data( TimestampedDataDTO(source_id, self.ts('2012-05-20T06:06:07'), 'size', '70')) # <= this must be loaded self.dao.insert_latest_data( TimestampedDataDTO(source_id, self.ts('2012-05-20T06:06:06'), 'size', '60')) result = self.dao.load_latest_data(source_id) self.assertEqual(result['temp'], '7') self.assertEqual(result['size'], '70')
def test_should_store_arabic_and_store_manual_index_and_load_by_free_text_search( self): arabic_text = u'مساعدة في تصليح كود' source_id = 'indexed_test_5' data_name = 'evil_text2' data_value_unicode = arabic_text #data_value_utf8 = data_value_unicode.encode('utf-8') beastly_timestamp = datetime.strptime('1988-03-01T06:06:11', '%Y-%m-%dT%H:%M:%S') dto = TimestampedDataDTO(source_id, beastly_timestamp, data_name, data_value_unicode) # Insert into timeseries shard self.dao.insert_timestamped_data(dto) # No auto-index for this baby, create a manual index #self.dao.insert_indexable_text_as_blob_data_and_insert_index(dto) blob_row_key_index = self.dao.insert_blob_data(dto) manual_indexes = list() # Any search-strings in unicode must be converted to UTF-8 to conform with the keys in the index entries in Cassandra manual_indexes.append( BlobIndexDTO(source_id, data_name, u'árabic'.encode('utf-8'), beastly_timestamp, blob_row_key_index)) manual_indexes.append( BlobIndexDTO(source_id, data_name, u'works'.encode('utf-8'), beastly_timestamp, blob_row_key_index)) self.dao.batch_insert_indexes(manual_indexes) # All values should be received for this date range result = self.dao.get_timetamped_data_range( source_id, data_name, beastly_timestamp - timedelta(minutes=1), beastly_timestamp + timedelta(minutes=1)) #print result self.assertEqual(len(result), 1) self.assertEqual(result[0][0], beastly_timestamp) self.assertEqual(result[0][1], data_value_unicode) # Now make a free text search search_string = u'works' result = self.dao.get_blobs_by_free_text_index(source_id, data_name, search_string) self.assertIsNotNone(result) self.assertEqual(len(result), 1) self.assertEqual(result[0][0], beastly_timestamp) self.assertEqual(result[0][1], data_value_unicode)
def test_should_return_build_actual_indexes_from_string_dto(self): # Given test_string = 'hello indexed words of yore' test_row_key = 'magic_key_123' dto = TimestampedDataDTO('the_kids', datetime.utcnow(), 'log_text', test_string) # When result = self.string_indexer.build_indexes_from_timstamped_dto( dto, test_row_key) # Then for index_dto in result: print u'Index: %s' % index_dto
def test_should_build_utc_based_row_key_for_hourly_using_timezone_aware_datetime( self): # Both should generate the same row key utc_now = datetime.utcnow() local_now = datetime.now(tz=pytz.timezone('US/Eastern')) dto = TimestampedDataDTO('test', local_now, 'd', '0') row_key_local_now = dto.get_row_key_for_hourly() dto = TimestampedDataDTO('test', utc_now, 'd', '0') row_key_utc_now = dto.get_row_key_for_hourly() self.assertEqual(row_key_utc_now, row_key_local_now)
def test_should_store_and_load_a_complex_string_and_corresponding_indexes_and_load_by_index( self): source_id = 'indexed_test_2' data_name = 'evil_text' data_value = u'Tue Mar 5 14:41:33 Hans-Eklunds-MacBook-Pro com.apple.backupd-auto[3780] <Notice>: Not starting scheduled Time Machine backup - time machine destination not resolvable.' beastly_timestamp = datetime.strptime('1982-03-01T06:06:06', '%Y-%m-%dT%H:%M:%S') dto = TimestampedDataDTO(source_id, beastly_timestamp, data_name, data_value) self.dao.insert_timestamped_data(dto) self.dao.insert_indexable_text_as_blob_data_and_insert_index(dto) # Now make a couple of free text search search_string = 'Notice' result = self.dao.get_blobs_by_free_text_index(source_id, data_name, search_string) self.assertEqual(len(result), 1) self.assertEqual(result[0][0], beastly_timestamp) self.assertEqual(result[0][1], data_value) search_string = 'hans eklunds MacBook pro' result = self.dao.get_blobs_by_free_text_index(source_id, data_name, search_string) self.assertEqual(len(result), 1) self.assertEqual(result[0][0], beastly_timestamp) self.assertEqual(result[0][1], data_value) search_string = 'backupd-auto[3780]' result = self.dao.get_blobs_by_free_text_index(source_id, data_name, search_string) self.assertEqual(len(result), 1) self.assertEqual(result[0][0], beastly_timestamp) self.assertEqual(result[0][1], data_value) # Test what happens on no hits search_string = 'w000000000t' result = self.dao.get_blobs_by_free_text_index(source_id, data_name, search_string) self.assertEqual(len(result), 0)
def __insert_range_of_metrics(self, source_id, value_name, start_datetime, end_datetime, batch_insert=False, set_latest=False): # # Insert test metrics over the days specified during setUP # one hour apart # #print 'Inserting test data from %s to %s' % (start_datetime, end_datetime) curr_datetime = start_datetime values_inserted = 0 value = 0 # Build list of DTOs dtos = list() while curr_datetime <= end_datetime: if self.insert_the_test_range_into_live_db: dto = TimestampedDataDTO(source_id, curr_datetime, value_name, str(value)) if not batch_insert: self.dao.insert_timestamped_data(dto, set_latest=set_latest) dtos.append(dto) curr_datetime = curr_datetime + timedelta(minutes=20) values_inserted += 1 value += 1 # And batch insert if batch_insert: self.dao.batch_insert_timestamped_data(dtos, set_latest=set_latest) return dtos