def test_incremental_dump(self): base = 1500000000 listens = generate_data(1, self.testuser_name, base - 4, 5, base + 1) # generate 5 listens with inserted_ts 1-5 self._insert_with_created(listens) listens = generate_data(1, self.testuser_name, base + 1, 5, base + 6) # generate 5 listens with inserted_ts 6-10 self._insert_with_created(listens) temp_dir = tempfile.mkdtemp() dump_location = self.logstore.dump_listens( location=temp_dir, dump_id=1, start_time=datetime.utcfromtimestamp(base + 6), end_time=datetime.utcfromtimestamp(base + 10)) self.assertTrue(os.path.isfile(dump_location)) self.reset_timescale_db() self.logstore.import_listens_dump(dump_location) listens, min_ts, max_ts = self.logstore.fetch_listens( user_name=self.testuser_name, to_ts=base + 11) self.assertEqual(len(listens), 4) self.assertEqual(listens[0].ts_since_epoch, base + 5) self.assertEqual(listens[1].ts_since_epoch, base + 4) self.assertEqual(listens[2].ts_since_epoch, base + 3) self.assertEqual(listens[3].ts_since_epoch, base + 2) shutil.rmtree(temp_dir)
def test_incremental_dump(self): """ Dump and import listens """ listens = generate_data(1, self.testuser_name, 1, 5) # generate 5 listens with ts 1-5 self.logstore.insert(listens) sleep(1) start_time = datetime.now() sleep(1) listens = generate_data(1, self.testuser_name, 6, 5) # generate 5 listens with ts 6-10 self.logstore.insert(listens) sleep(1) temp_dir = tempfile.mkdtemp() dump_location = self.logstore.dump_listens( location=temp_dir, dump_id=1, start_time=start_time, end_time=datetime.now(), ) sleep(1) self.assertTrue(os.path.isfile(dump_location)) self.reset_influx_db() sleep(1) self.logstore.import_listens_dump(dump_location) sleep(1) listens = self.logstore.fetch_listens(user_name=self.testuser_name, to_ts=11) self.assertEqual(len(listens), 5) self.assertEqual(listens[0].ts_since_epoch, 10) self.assertEqual(listens[1].ts_since_epoch, 9) self.assertEqual(listens[2].ts_since_epoch, 8) self.assertEqual(listens[3].ts_since_epoch, 7) self.assertEqual(listens[4].ts_since_epoch, 6)
def test_time_range_full_dumps(self): base = 1500000000 listens = generate_data(1, self.testuser_name, base + 1, 5) # generate 5 listens with ts 1-5 self.logstore.insert(listens) listens = generate_data(1, self.testuser_name, base + 6, 5) # generate 5 listens with ts 6-10 self.logstore.insert(listens) temp_dir = tempfile.mkdtemp() dump_location = self.dumpstore.dump_listens( location=temp_dir, dump_id=1, end_time=datetime.utcfromtimestamp(base + 5)) self.assertTrue(os.path.isfile(dump_location)) self.reset_timescale_db() self.logstore.import_listens_dump(dump_location) recalculate_all_user_data() listens, min_ts, max_ts = self.logstore.fetch_listens( user=self.testuser, to_ts=base + 11) self.assertEqual(len(listens), 5) self.assertEqual(listens[0].ts_since_epoch, base + 5) self.assertEqual(listens[1].ts_since_epoch, base + 4) self.assertEqual(listens[2].ts_since_epoch, base + 3) self.assertEqual(listens[3].ts_since_epoch, base + 2) self.assertEqual(listens[4].ts_since_epoch, base + 1)
def _create_test_data(self): self.log.info("Inserting test data...") self.listen = generate_data(self.testuser_id,'test', MIN_ID + 1, 1)[0] listen = self.listen.to_json() self._redis.redis.setex('playing_now' + ':' + str(listen['user_id']), ujson.dumps(listen).encode('utf-8'), self.config.PLAYING_NOW_MAX_DURATION) self.log.info("Test data inserted")
def test_create_full_db(self, mock_notify): listens = generate_data(1, self.user_name, 1500000000, 5) self.listenstore.insert(listens) sleep(1) # create a full dump self.runner.invoke(dump_manager.create_full, ['--location', self.tempdir]) self.assertEqual(len(os.listdir(self.tempdir)), 1) dump_name = os.listdir(self.tempdir)[0] mock_notify.assert_called_with(dump_name, 'fullexport') # make sure that the dump contains a full listens dump, a public dump # a private dump and a spark dump. archive_count = 0 for file_name in os.listdir(os.path.join(self.tempdir, dump_name)): if file_name.endswith('.tar.xz'): archive_count += 1 self.assertEqual(archive_count, 4) # now, remove the old dump and create a new one with the same id shutil.rmtree(os.path.join(self.tempdir, dump_name)) self.runner.invoke(dump_manager.create_full, ['--location', self.tempdir, '--last-dump-id']) self.assertEqual(len(os.listdir(self.tempdir)), 1) recreated_dump_name = os.listdir(self.tempdir)[0] # dump names should be the exact same self.assertEqual(dump_name, recreated_dump_name) # dump should contain the 4 archives archive_count = 0 for file_name in os.listdir(os.path.join(self.tempdir, dump_name)): if file_name.endswith('.tar.xz'): archive_count += 1 self.assertEqual(archive_count, 4)
def test_create_incremental_dump_with_id(self): # if the dump ID does not exist, it should exit with a -1 result = self.runner.invoke( dump_manager.create_incremental, ['--location', self.tempdir, '--dump-id', 1000]) self.assertEqual(result.exit_code, -1) # create a base dump entry t = int(time.time()) db_dump.add_dump_entry(t) sleep(1) self.listenstore.insert(generate_data(1, self.user_name, 1500000000, 5)) sleep(1) # create a new dump ID to recreate later dump_id = db_dump.add_dump_entry(int(time.time())) # now, create a dump with that specific dump id result = self.runner.invoke( dump_manager.create_incremental, ['--location', self.tempdir, '--dump-id', dump_id]) self.assertEqual(len(os.listdir(self.tempdir)), 1) dump_name = os.listdir(self.tempdir)[0] created_dump_id = int(dump_name.split('-')[2]) self.assertEqual(dump_id, created_dump_id) # dump should contain the listen and spark archive archive_count = 0 for file_name in os.listdir(os.path.join(self.tempdir, dump_name)): if file_name.endswith('.tar.xz'): archive_count += 1 self.assertEqual(archive_count, 2)
def test_create_incremental(self, mock_notify): # create a incremental dump, this won't work because the incremental dump does # not have a previous dump result = self.runner.invoke(dump_manager.create_incremental, ['--location', self.tempdir]) self.assertEqual(result.exit_code, -1) self.assertEqual(len(os.listdir(self.tempdir)), 0) base = int(time.time()) dump_id = db_dump.add_dump_entry(base - 60) print("%d dump id" % dump_id) sleep(1) self.listenstore.insert(generate_data(1, self.user_name, base - 30, 5)) result = self.runner.invoke(dump_manager.create_incremental, ['--location', self.tempdir]) self.assertEqual(len(os.listdir(self.tempdir)), 1) dump_name = os.listdir(self.tempdir)[0] mock_notify.assert_called_with(dump_name, 'incremental') # created dump ID should be one greater than previous dump's ID created_dump_id = int(dump_name.split('-')[2]) print("%d created dump id" % created_dump_id) self.assertEqual(created_dump_id, dump_id + 1) # make sure that the dump contains a full listens and spark dump archive_count = 0 for file_name in os.listdir(os.path.join(self.tempdir, dump_name)): if file_name.endswith('.tar.xz'): archive_count += 1 self.assertEqual(archive_count, 2)
def _create_test_data(self): self.log.info("Inserting test data...") self.listen = generate_data(self.testuser_id, MIN_ID + 1, 1)[0] listen = self.listen.to_json() self._redis.redis.setex('playing_now' + ':' + str(listen['user_id']), ujson.dumps(listen).encode('utf-8'), self.config.PLAYING_NOW_MAX_DURATION) self.log.info("Test data inserted")
def test_create_full_dump_with_id(self): self.listenstore.insert(generate_data(1, self.user_name, 1500000000, 5)) # if the dump ID does not exist, it should exit with a -1 result = self.runner.invoke( dump_manager.create_full, ['--location', self.tempdir, '--dump-id', 1000]) self.assertEqual(result.exit_code, -1) # make sure no directory was created either self.assertEqual(len(os.listdir(self.tempdir)), 0) # now, add a dump entry to the database and create a dump with that specific dump id dump_id = db_dump.add_dump_entry(int(time.time())) result = self.runner.invoke( dump_manager.create_full, ['--location', self.tempdir, '--dump-id', dump_id]) self.assertEqual(len(os.listdir(self.tempdir)), 1) dump_name = os.listdir(self.tempdir)[0] created_dump_id = int(dump_name.split('-')[2]) self.assertEqual(dump_id, created_dump_id) # dump should contain the 4 archives archive_count = 0 for file_name in os.listdir(os.path.join(self.tempdir, dump_name)): if file_name.endswith('.tar.xz'): archive_count += 1 self.assertEqual(archive_count, 4)
def test_listen_counts_in_cache(self): count = self._create_test_data(self.testuser_name) self.assertEqual(count, self.logstore.get_listen_count_for_user(self.testuser_name, need_exact=True)) user_key = '{}{}'.format(self.ns + REDIS_TIMESCALE_USER_LISTEN_COUNT, self.testuser_name) self.assertEqual(count, int(cache.get(user_key, decode=False))) batch = generate_data(self.testuser_id, self.testuser_name, int(time()), 1) self.logstore.insert(batch) self.assertEqual(count + 1, int(cache.get(user_key, decode=False)))
def test_listen_counts_in_cache(self): count = self._create_test_data(self.testuser_name) self.assertEqual(count, self.logstore.get_listen_count_for_user(self.testuser_name, need_exact=True)) user_key = '{}{}'.format(REDIS_INFLUX_USER_LISTEN_COUNT, self.testuser_name) self.assertEqual(count, int(cache.get(user_key, decode=False))) batch = generate_data(self.testuser_id, self.testuser_name, int(time.time()), 1) self.logstore.insert(batch) self.assertEqual(count + 1, int(cache.get(user_key, decode=False)))
def test_time_range_full_dumps(self): listens = generate_data(1, self.testuser_name, 1, 5) # generate 5 listens with ts 1-5 self.logstore.insert(listens) sleep(1) between_time = datetime.now() sleep(1) listens = generate_data(1, self.testuser_name, 6, 5) # generate 5 listens with ts 6-10 self.logstore.insert(listens) sleep(1) temp_dir = tempfile.mkdtemp() dump_location = self.logstore.dump_listens( location=temp_dir, dump_id=1, end_time=between_time, ) spark_dump_location = self.logstore.dump_listens( location=temp_dir, dump_id=1, end_time=between_time, spark_format=True, ) sleep(1) self.assertTrue(os.path.isfile(dump_location)) self.reset_influx_db() sleep(1) self.logstore.import_listens_dump(dump_location) sleep(1) listens = self.logstore.fetch_listens(user_name=self.testuser_name, to_ts=11) self.assertEqual(len(listens), 5) self.assertEqual(listens[0].ts_since_epoch, 5) self.assertEqual(listens[1].ts_since_epoch, 4) self.assertEqual(listens[2].ts_since_epoch, 3) self.assertEqual(listens[3].ts_since_epoch, 2) self.assertEqual(listens[4].ts_since_epoch, 1) self.assert_spark_dump_contains_listens(spark_dump_location, 5) shutil.rmtree(temp_dir)
def test_listen_counts_in_cache(self): uid = random.randint(2000, 1 << 31) testuser = db_user.get_or_create(uid, "user_%d" % uid) testuser_name = testuser['musicbrainz_id'] count = self._create_test_data(testuser_name) user_key = REDIS_USER_LISTEN_COUNT + testuser_name self.assertEqual( count, self.logstore.get_listen_count_for_user(testuser_name)) self.assertEqual(count, int(cache.get(user_key, decode=False) or 0)) batch = generate_data(uid, testuser_name, int(time()), 1) self.logstore.insert(batch) self.assertEqual(count + 1, int(cache.get(user_key, decode=False) or 0))
def test_incremental_dumps_listen_with_no_insert_timestamp(self): """ Incremental dumps should only consider listens that have inserted_timestamps. """ t = datetime.now() sleep(1) listens = generate_data(1, self.testuser_name, 1, 5) # insert these listens into influx without an insert_timestamp influx_rows = [ listen.to_influx(quote(self.testuser_name)) for listen in listens ] for row in influx_rows[1:]: row['fields'].pop('inserted_timestamp') self.logstore.write_points_to_db(influx_rows) sleep(1) listens_from_influx = self.logstore.fetch_listens( user_name=self.testuser_name, to_ts=11) self.assertEqual(len(listens_from_influx), 5) # incremental dump (with a start time) should not contain these listens temp_dir = tempfile.mkdtemp() dump_location = self.logstore.dump_listens( location=temp_dir, dump_id=1, start_time=t, end_time=datetime.now(), ) spark_dump_location = self.logstore.dump_listens( location=temp_dir, dump_id=1, start_time=t, end_time=datetime.now(), spark_format=True, ) self.assertTrue(os.path.isfile(dump_location)) self.reset_influx_db() sleep(1) self.logstore.import_listens_dump(dump_location) sleep(1) listens_from_influx = self.logstore.fetch_listens( user_name=self.testuser_name, to_ts=11) self.assertEqual(len(listens_from_influx), 1) self.assert_spark_dump_contains_listens(spark_dump_location, 1) shutil.rmtree(temp_dir)
def test_full_dump_listen_with_no_insert_timestamp(self): """ We have listens with no `inserted_timestamps` inside the production database. This means that full dumps should always be able to dump these listens as well. This is a test to test that. """ listens = generate_data(1, self.testuser_name, 1, 5) # insert these listens into influx without an insert_timestamp influx_rows = [ listen.to_influx(quote(self.testuser_name)) for listen in listens ] for row in influx_rows[1:]: row['fields'].pop('inserted_timestamp') t = datetime.now() self.logstore.write_points_to_db(influx_rows) sleep(1) listens_from_influx = self.logstore.fetch_listens( user_name=self.testuser_name, to_ts=11) self.assertEqual(len(listens_from_influx), 5) # full dump (with no start time) should contain these listens temp_dir = tempfile.mkdtemp() dump_location = self.logstore.dump_listens( location=temp_dir, dump_id=1, end_time=datetime.now(), ) spark_dump_location = self.logstore.dump_listens( location=temp_dir, dump_id=1, end_time=datetime.now(), spark_format=True, ) self.assertTrue(os.path.isfile(dump_location)) self.reset_influx_db() sleep(1) self.logstore.import_listens_dump(dump_location) sleep(1) listens_from_influx = self.logstore.fetch_listens( user_name=self.testuser_name, to_ts=11) self.assertEqual(len(listens_from_influx), 5) self.assert_spark_dump_contains_listens(spark_dump_location, 5) shutil.rmtree(temp_dir)
def test_create_full_db(self, mock_notify): listens = generate_data(1, self.user_name, 1500000000, 5) self.listenstore.insert(listens) sleep(1) # create a full dump self.runner.invoke(dump_manager.create_full, ['--location', self.tempdir]) self.assertEqual(len(os.listdir(self.tempdir)), 1) dump_name = os.listdir(self.tempdir)[0] mock_notify.assert_called_with(dump_name, 'fullexport') # make sure that the dump contains a full listens dump, a public dump # a private dump and a spark dump. archive_count = 0 for file_name in os.listdir(os.path.join(self.tempdir, dump_name)): if file_name.endswith('.tar.xz'): archive_count += 1 self.assertEqual(archive_count, 4)
def _create_test_data(self, from_ts=MIN_ID + 1, num_listens=random.randint(1, 100)): self.log.info("Inserting test data...") test_data = generate_data(self.testuser_id, from_ts, num_listens) self.logstore.insert(test_data) self.log.info("Test data inserted") return from_ts, num_listens