Beispiel #1
0
    def test_should_store_muliple_similar_complex_strings_with_different_timestamps_saved_out_of_order_should_be_loaded_in_order(
            self):
        source_id = 'indexed_test_3'
        data_name = 'evil_text'

        data_value1 = u'Hans-Eklunds-MacBook-Pro com.apple.backupd-auto[3780] <Notice>: Not stârting scheduled ' \
                      u'Time Machine backup - time machine destination not resolvable.'
        beastly_timestamp1 = datetime.strptime('1982-03-01T06:06:06',
                                               '%Y-%m-%dT%H:%M:%S')
        dto1 = TimestampedDataDTO(source_id, beastly_timestamp1, data_name,
                                  data_value1)

        data_value2 = u'Hans-Smiths-MacBook-Pro com.apple.backupd-auto[3780] <Notice>: Not starting scheduled ' \
                      u'Time Machine backup - time machine destination not resolvable.'
        beastly_timestamp2 = datetime.strptime('1982-03-01T06:06:08',
                                               '%Y-%m-%dT%H:%M:%S')
        dto2 = TimestampedDataDTO(source_id, beastly_timestamp2, data_name,
                                  data_value2)

        data_value3 = u'Hans-Johnssons-MacBook-Pro com.apple.backupd-auto[3780] <Notice>: Not starting scheduled ' \
                      u'Time Machine backup - time machine destination not resolvable.'
        beastly_timestamp3 = datetime.strptime('1982-03-01T06:06:07',
                                               '%Y-%m-%dT%H:%M:%S')
        dto3 = TimestampedDataDTO(source_id, beastly_timestamp3, data_name,
                                  data_value3)

        data_value4 = u'time machine destination not recoverable.'
        beastly_timestamp4 = datetime.strptime('1982-03-01T06:06:09',
                                               '%Y-%m-%dT%H:%M:%S')
        dto4 = TimestampedDataDTO(source_id, beastly_timestamp4, data_name,
                                  data_value4)

        self.dao.insert_timestamped_data(dto1)
        self.dao.insert_indexable_text_as_blob_data_and_insert_index(dto1)
        self.dao.insert_timestamped_data(dto2)
        self.dao.insert_indexable_text_as_blob_data_and_insert_index(dto2)
        self.dao.insert_timestamped_data(dto3)
        self.dao.insert_indexable_text_as_blob_data_and_insert_index(dto3)
        self.dao.insert_timestamped_data(dto4)
        self.dao.insert_indexable_text_as_blob_data_and_insert_index(dto4)

        # Three should be found, the last one should not be found by this search
        search_string = 'Notice'
        results = self.dao.get_blobs_by_free_text_index(
            source_id, data_name, search_string)
        self.assertEqual(len(results), 3)

        # Assert correct order
        self.assertTrue(results[0][0] < results[1][0])
        self.assertTrue(results[1][0] < results[2][0])

        #for result in results:
        #print result

        # Assure only one hit on this unique name
        search_string = 'Hans-Smiths-MacBook'
        results = self.dao.get_blobs_by_free_text_index(
            source_id, data_name, search_string)
        self.assertEqual(len(results), 1)
Beispiel #2
0
    def test_should_store_data_for_several_data_names_and_load_by_index_with_date_range(
            self):
        source_id = 'unittests.indexed_test_7'
        data_name1 = 'evil3_text'
        data_name2 = 'bad3_text'
        data_name3 = 'nasty3_text'

        # Note, they differ slightly, but has common words so search can hit all of them
        data_value_unicode1 = u'Woe to you o örth ánd sea. For the devil sends the beast with wrath'
        data_value_unicode2 = u'Darn to you o örth ánd sea. For the mother sends the beast with wrath'
        data_value_unicode3 = u'Hey to you o örth ánd sea. For the bushes sends the beast with wrath'
        #data_value_utf8 = data_value_unicode.encode('utf-8')
        beastly_timestamp1 = datetime.strptime('1982-03-01T06:06:05',
                                               '%Y-%m-%dT%H:%M:%S')
        beastly_timestamp2 = datetime.strptime('1982-03-01T06:07:05',
                                               '%Y-%m-%dT%H:%M:%S')
        beastly_timestamp3 = datetime.strptime('1982-03-01T06:08:05',
                                               '%Y-%m-%dT%H:%M:%S')

        dto1 = TimestampedDataDTO(source_id, beastly_timestamp1, data_name1,
                                  data_value_unicode1)
        self.dao.insert_indexable_text_as_blob_data_and_insert_index(dto1)
        dto2 = TimestampedDataDTO(source_id, beastly_timestamp2, data_name2,
                                  data_value_unicode2)
        self.dao.insert_indexable_text_as_blob_data_and_insert_index(dto2)
        dto3 = TimestampedDataDTO(source_id, beastly_timestamp3, data_name3,
                                  data_value_unicode3)
        self.dao.insert_indexable_text_as_blob_data_and_insert_index(dto3)

        # Now make a free text search
        search_string = 'sea'
        start_time = datetime.strptime('1982-03-01T06:07:00',
                                       '%Y-%m-%dT%H:%M:%S')
        end_time = datetime.strptime('1982-03-01T06:07:10',
                                     '%Y-%m-%dT%H:%M:%S')
        result = self.dao.get_blobs_multi_data_by_free_text_index(
            source_id, [data_name1, data_name2, data_name3], search_string,
            start_time, end_time)

        # Should only find middle instance for given range
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0][0], beastly_timestamp2)
        self.assertEqual(result[0][1], data_value_unicode2)

        # Now make same free text search with something that should NOT hit
        search_string = 'volvo'
        start_time = datetime.strptime('1982-03-01T06:07:00',
                                       '%Y-%m-%dT%H:%M:%S')
        end_time = datetime.strptime('1982-03-01T06:07:10',
                                     '%Y-%m-%dT%H:%M:%S')
        result = self.dao.get_blobs_multi_data_by_free_text_index(
            source_id, [data_name1, data_name2, data_name3], search_string,
            start_time, end_time)

        # Should only find middle instance for given range
        self.assertEqual(len(result), 0)
Beispiel #3
0
    def test_should_build_utc_epoch_milliseconds_using_timezone_aware_datetime(self):

        # Both should generate the same row key
        utc_now = datetime.utcnow()
        local_now = datetime.now(tz=pytz.timezone('US/Eastern'))

        dto = TimestampedDataDTO('test', local_now, 'd', '0')
        unix_time_millis = dto.timestamp_as_unix_time_millis()

        print unix_time_millis
Beispiel #4
0
    def test_should_print_row_key_for_hourly_using_naive_datetime(self):

        # returns naive datetime in the local timezone (ie. no timezone info)
        naive_local_now = datetime.now()
        utc_now = datetime.utcnow()

        dto = TimestampedDataDTO('test', naive_local_now, 'd', '0')

        row_key = dto.get_row_key_for_hourly()

        print row_key
Beispiel #5
0
    def test_should_print_row_key_for_hourly_using_naive_datetime(self):

        # returns naive datetime in the local timezone (ie. no timezone info)
        naive_local_now = datetime.now()
        utc_now = datetime.utcnow()

        dto = TimestampedDataDTO('test', naive_local_now, 'd', '0')

        row_key = dto.get_row_key_for_hourly()

        print row_key
Beispiel #6
0
    def test_should_build_utc_epoch_milliseconds_using_timezone_aware_datetime(
            self):

        # Both should generate the same row key
        utc_now = datetime.utcnow()
        local_now = datetime.now(tz=pytz.timezone('US/Eastern'))

        dto = TimestampedDataDTO('test', local_now, 'd', '0')
        unix_time_millis = dto.timestamp_as_unix_time_millis()

        print unix_time_millis
Beispiel #7
0
    def test_should_build_utc_based_row_key_for_hourly_using_timezone_aware_datetime(self):

        # Both should generate the same row key
        utc_now = datetime.utcnow()
        local_now = datetime.now(tz=pytz.timezone('US/Eastern'))

        dto = TimestampedDataDTO('test', local_now, 'd', '0')
        row_key_local_now = dto.get_row_key_for_hourly()

        dto = TimestampedDataDTO('test', utc_now, 'd', '0')
        row_key_utc_now = dto.get_row_key_for_hourly()

        self.assertEqual(row_key_utc_now, row_key_local_now)
Beispiel #8
0
    def test_should_store_a_unicode_string_and_corresponding_indexes_and_load_by_date_range_and_index(
            self):
        source_id = 'indexed_test_1'
        data_name = 'evil_text'
        data_value_unicode = u'Woe to you o örth ánd sea. For the devil sends the beast with wrath'
        #data_value_utf8 = data_value_unicode.encode('utf-8')
        beastly_timestamp = datetime.strptime('1982-03-01T06:06:06',
                                              '%Y-%m-%dT%H:%M:%S')

        dto = TimestampedDataDTO(source_id, beastly_timestamp, data_name,
                                 data_value_unicode)
        self.dao.insert_timestamped_data(dto)
        self.dao.insert_indexable_text_as_blob_data_and_insert_index(dto)

        # All values should be received for this date range
        result = self.dao.get_timetamped_data_range(
            source_id, data_name, beastly_timestamp - timedelta(minutes=1),
            beastly_timestamp + timedelta(minutes=1))
        #print result

        self.assertEqual(len(result), 1)
        self.assertEqual(result[0][0], beastly_timestamp)
        self.assertEqual(result[0][1], data_value_unicode)

        # Now make a free text search
        search_string = 'sea'
        result = self.dao.get_blobs_by_free_text_index(source_id, data_name,
                                                       search_string)

        self.assertEqual(len(result), 1)
        self.assertEqual(result[0][0], beastly_timestamp)
        self.assertEqual(result[0][1], data_value_unicode)
Beispiel #9
0
    def test_should_store_data_for_several_data_names_and_load_by_multi_data_index_search(
            self):
        source_id = 'indexed_test_6'
        data_name1 = 'evil3_text'
        data_name2 = 'bad3_text'
        data_name3 = 'nasty3_text'

        # Note, they differ slightly, but has common words so search can hit all of them
        data_value_unicode1 = u'Woe to you o örth ánd sea. For the devil sends the beast with wrath'
        data_value_unicode2 = u'Darn to you o örth ánd sea. For the mother sends the beast with wrath'
        data_value_unicode3 = u'Hey to you o örth ánd sea. For the bushes sends the beast with wrath'
        #data_value_utf8 = data_value_unicode.encode('utf-8')
        beastly_timestamp1 = datetime.strptime('1982-03-01T06:06:06',
                                               '%Y-%m-%dT%H:%M:%S')
        beastly_timestamp2 = datetime.strptime('1982-03-01T06:06:07',
                                               '%Y-%m-%dT%H:%M:%S')
        beastly_timestamp3 = datetime.strptime('1982-03-01T06:06:08',
                                               '%Y-%m-%dT%H:%M:%S')

        dto1 = TimestampedDataDTO(source_id, beastly_timestamp1, data_name1,
                                  data_value_unicode1)
        self.dao.insert_indexable_text_as_blob_data_and_insert_index(dto1)
        dto2 = TimestampedDataDTO(source_id, beastly_timestamp2, data_name2,
                                  data_value_unicode2)
        self.dao.insert_indexable_text_as_blob_data_and_insert_index(dto2)
        dto3 = TimestampedDataDTO(source_id, beastly_timestamp3, data_name3,
                                  data_value_unicode3)
        self.dao.insert_indexable_text_as_blob_data_and_insert_index(dto3)

        # Now make a free text search
        search_string = 'sea'
        result = self.dao.get_blobs_multi_data_by_free_text_index(
            source_id, [data_name1, data_name2, data_name3], search_string)

        self.assertEqual(len(result), 3)
        self.assertEqual(result[0][0], beastly_timestamp1)
        self.assertEqual(result[0][1], data_value_unicode1)
        self.assertEqual(result[1][0], beastly_timestamp2)
        self.assertEqual(result[1][1], data_value_unicode2)
        self.assertEqual(result[2][0], beastly_timestamp3)
        self.assertEqual(result[2][1], data_value_unicode3)
Beispiel #10
0
    def test_should_insert_latest_data_with_different_timestamps_and_only_newest_should_be_loaded(
            self):
        source_id = 'latest_test_1C'

        # Ensure old test data is gone
        self.dao.remove_latest_data(source_id)

        # Ensure empty result
        result = self.dao.load_latest_data(source_id)
        self.assertEqual(result, {})

        # Note, latest is not latest inserted, but with most recent time!
        self.dao.insert_latest_data(
            TimestampedDataDTO(source_id, self.ts('2012-05-20T06:06:05'),
                               'temp', '5'))
        self.dao.insert_latest_data(
            TimestampedDataDTO(source_id, self.ts('2012-05-20T06:06:07'),
                               'temp', '7'))  # <= this must be loaded
        self.dao.insert_latest_data(
            TimestampedDataDTO(source_id, self.ts('2012-05-20T06:06:06'),
                               'temp', '6'))
        self.dao.insert_latest_data(
            TimestampedDataDTO(source_id, self.ts('2012-05-20T06:06:05'),
                               'size', '50'))
        self.dao.insert_latest_data(
            TimestampedDataDTO(source_id, self.ts('2012-05-20T06:06:07'),
                               'size', '70'))  # <= this must be loaded
        self.dao.insert_latest_data(
            TimestampedDataDTO(source_id, self.ts('2012-05-20T06:06:06'),
                               'size', '60'))

        result = self.dao.load_latest_data(source_id)
        self.assertEqual(result['temp'], '7')
        self.assertEqual(result['size'], '70')
Beispiel #11
0
    def test_should_store_arabic_and_store_manual_index_and_load_by_free_text_search(
            self):
        arabic_text = u'مساعدة في تصليح كود'
        source_id = 'indexed_test_5'
        data_name = 'evil_text2'
        data_value_unicode = arabic_text
        #data_value_utf8 = data_value_unicode.encode('utf-8')
        beastly_timestamp = datetime.strptime('1988-03-01T06:06:11',
                                              '%Y-%m-%dT%H:%M:%S')

        dto = TimestampedDataDTO(source_id, beastly_timestamp, data_name,
                                 data_value_unicode)

        # Insert into timeseries shard
        self.dao.insert_timestamped_data(dto)

        # No auto-index for this baby, create a manual index
        #self.dao.insert_indexable_text_as_blob_data_and_insert_index(dto)
        blob_row_key_index = self.dao.insert_blob_data(dto)

        manual_indexes = list()

        # Any search-strings in unicode must be converted to UTF-8 to conform with the keys in the index entries in Cassandra
        manual_indexes.append(
            BlobIndexDTO(source_id, data_name, u'árabic'.encode('utf-8'),
                         beastly_timestamp, blob_row_key_index))
        manual_indexes.append(
            BlobIndexDTO(source_id, data_name, u'works'.encode('utf-8'),
                         beastly_timestamp, blob_row_key_index))
        self.dao.batch_insert_indexes(manual_indexes)

        # All values should be received for this date range
        result = self.dao.get_timetamped_data_range(
            source_id, data_name, beastly_timestamp - timedelta(minutes=1),
            beastly_timestamp + timedelta(minutes=1))
        #print result

        self.assertEqual(len(result), 1)
        self.assertEqual(result[0][0], beastly_timestamp)
        self.assertEqual(result[0][1], data_value_unicode)

        # Now make a free text search
        search_string = u'works'
        result = self.dao.get_blobs_by_free_text_index(source_id, data_name,
                                                       search_string)

        self.assertIsNotNone(result)
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0][0], beastly_timestamp)
        self.assertEqual(result[0][1], data_value_unicode)
Beispiel #12
0
    def test_should_return_build_actual_indexes_from_string_dto(self):

        # Given
        test_string = 'hello indexed words of yore'
        test_row_key = 'magic_key_123'
        dto = TimestampedDataDTO('the_kids', datetime.utcnow(), 'log_text',
                                 test_string)

        # When
        result = self.string_indexer.build_indexes_from_timstamped_dto(
            dto, test_row_key)

        # Then
        for index_dto in result:
            print u'Index: %s' % index_dto
Beispiel #13
0
    def test_should_build_utc_based_row_key_for_hourly_using_timezone_aware_datetime(
            self):

        # Both should generate the same row key
        utc_now = datetime.utcnow()
        local_now = datetime.now(tz=pytz.timezone('US/Eastern'))

        dto = TimestampedDataDTO('test', local_now, 'd', '0')
        row_key_local_now = dto.get_row_key_for_hourly()

        dto = TimestampedDataDTO('test', utc_now, 'd', '0')
        row_key_utc_now = dto.get_row_key_for_hourly()

        self.assertEqual(row_key_utc_now, row_key_local_now)
Beispiel #14
0
    def test_should_store_and_load_a_complex_string_and_corresponding_indexes_and_load_by_index(
            self):
        source_id = 'indexed_test_2'
        data_name = 'evil_text'
        data_value = u'Tue Mar  5 14:41:33 Hans-Eklunds-MacBook-Pro com.apple.backupd-auto[3780] <Notice>: Not starting scheduled Time Machine backup - time machine destination not resolvable.'
        beastly_timestamp = datetime.strptime('1982-03-01T06:06:06',
                                              '%Y-%m-%dT%H:%M:%S')

        dto = TimestampedDataDTO(source_id, beastly_timestamp, data_name,
                                 data_value)
        self.dao.insert_timestamped_data(dto)
        self.dao.insert_indexable_text_as_blob_data_and_insert_index(dto)

        # Now make a couple of free text search
        search_string = 'Notice'
        result = self.dao.get_blobs_by_free_text_index(source_id, data_name,
                                                       search_string)
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0][0], beastly_timestamp)
        self.assertEqual(result[0][1], data_value)

        search_string = 'hans eklunds MacBook pro'
        result = self.dao.get_blobs_by_free_text_index(source_id, data_name,
                                                       search_string)
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0][0], beastly_timestamp)
        self.assertEqual(result[0][1], data_value)

        search_string = 'backupd-auto[3780]'
        result = self.dao.get_blobs_by_free_text_index(source_id, data_name,
                                                       search_string)
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0][0], beastly_timestamp)
        self.assertEqual(result[0][1], data_value)

        # Test what happens on no hits
        search_string = 'w000000000t'
        result = self.dao.get_blobs_by_free_text_index(source_id, data_name,
                                                       search_string)
        self.assertEqual(len(result), 0)
Beispiel #15
0
    def __insert_range_of_metrics(self,
                                  source_id,
                                  value_name,
                                  start_datetime,
                                  end_datetime,
                                  batch_insert=False,
                                  set_latest=False):
        #
        # Insert test metrics over the days specified during setUP
        # one hour apart
        #
        #print 'Inserting test data from %s to %s' % (start_datetime, end_datetime)
        curr_datetime = start_datetime

        values_inserted = 0
        value = 0

        # Build list of DTOs
        dtos = list()
        while curr_datetime <= end_datetime:
            if self.insert_the_test_range_into_live_db:
                dto = TimestampedDataDTO(source_id, curr_datetime, value_name,
                                         str(value))
                if not batch_insert:
                    self.dao.insert_timestamped_data(dto,
                                                     set_latest=set_latest)

                dtos.append(dto)
            curr_datetime = curr_datetime + timedelta(minutes=20)
            values_inserted += 1
            value += 1

        # And batch insert
        if batch_insert:
            self.dao.batch_insert_timestamped_data(dtos, set_latest=set_latest)
        return dtos