def __init__(self, aws_access_key_id=None, aws_secret_access_key=None, aws_bucket_name='cvae-insights', local_data_store=False, deployment_prefix='dev', model_version='2019-01-03'): """Create an instance of GetData.""" self.aws_access_key_id = os.environ.get('AWS_S3_ACCESS_KEY_ID', '') self.aws_secret_access_key = os.environ.get('AWS_S3_SECRET_ACCESS_KEY', '') self.github_token = os.environ.get('GITHUB_TOKEN', '') self.bucket_name = aws_bucket_name self.deployment_prefix = deployment_prefix self.version_name = model_version if local_data_store: self.s3_client = LocalDataStore('tests/test_data') else: self.s3_object = AmazonS3( bucket_name=self.bucket_name, aws_access_key_id=self.aws_access_key_id, aws_secret_access_key=self.aws_secret_access_key) self.s3_client = self.load_S3() self.utility = Utility()
def __init__(self, *args, **kwargs): """Initialise the local data store and HPF object.""" super(TestHPFScoringMethods, self).__init__(*args, **kwargs) self.local_obj = LocalDataStore("tests/test_data") self.hpf_obj = HPFScoring(self.local_obj) self.hpf_obj_feedback = HPFScoring(self.local_obj) self.package_id_dict = OrderedDict() self.id_package_dict = OrderedDict() self.manifest_id_dict = OrderedDict()
def local_data_store(request, tmp_dir): data_store = LocalDataStore(tmp_dir) data_path = Path(__file__).resolve().parents[1] test_dir_path = data_path.joinpath("data").absolute() dir_util.copy_tree(test_dir_path, tmp_dir) def teardown(): nonlocal data_store del data_store request.addfinalizer(teardown) return data_store
class TestPMFRecommendation(TestCase): """Test the core recommendations task.""" def setUp(self): """Instantiate the resources required for the tests.""" self.fs = LocalDataStore('tests/test_data') self.assertTrue(self.fs.get_name().endswith('tests/test_data')) self.pmf_rec = PMFRecommendation(2, data_store=self.fs, num_latent=5) def test__find_closest_user_in_training_set(self): """Test if we are getting correct "closest user" from the training set.""" # Full match closest = self.pmf_rec._find_closest_user_in_training_set( [17190, 14774, 15406, 16594, 29063]) self.assertIsNotNone(closest) # Partial closest = self.pmf_rec._find_closest_user_in_training_set( [17190, 14774, 15406]) self.assertIsNotNone(closest) # Negative closest = self.pmf_rec._find_closest_user_in_training_set([3, 4]) self.assertIsNone(closest) def test__sigmoid(self): """Test if the sigmoid function is behaving correctly.""" self.assertEqual(self.pmf_rec._sigmoid(0), 0.5) def test_predict(self): """Test the prediction flow.""" # Test for a new stack. missing, recommendation, ptm = self.pmf_rec.predict(['pon-logger']) self.assertFalse(missing) # Should have two recommendations here. self.assertEqual(len(recommendation), 2) # Tests for missing package. missing, recommendation, _ = self.pmf_rec.predict( ['pon-logger', 'missing']) self.assertTrue(missing) # Test if still getting recommendation as no. of missing = no. of known self.assertGreater(len(recommendation), 0) missing, _, package_tag_map = self.pmf_rec.predict(['missing']) self.assertDictEqual(package_tag_map, {}) # Test for precomputed stack. _, recommendation, _ = self.pmf_rec.predict( ['async', 'colors', 'request', 'underscore', 'pkginfo']) self.assertTrue(recommendation)
bucket_name=cloud_constants.S3_BUCKET_NAME, # pragma: no cover aws_access_key_id=cloud_constants.AWS_S3_ACCESS_KEY_ID, aws_secret_access_key=cloud_constants.AWS_S3_SECRET_KEY_ID) s3.connect() elif LOCAL_ACCESS: print("INSIDE LOCAL ACCESS") s3 = AmazonS3(bucket_name=cloud_constants.S3_BUCKET_NAME, aws_access_key_id=cloud_constants.AWS_S3_ACCESS_KEY_ID, aws_secret_access_key=cloud_constants.AWS_S3_SECRET_KEY_ID, endpoint_url=cloud_constants.AWS_S3_ENDPOINT_URL, local_dev=True) s3.connect() else: from rudra.data_store.local_data_store import LocalDataStore # Change the source directory here for local file system testing. s3 = LocalDataStore('tests/test_data/') ScoringParams.num_latent_factors = 5 # This needs to be global as ~200MB of data is loaded from S3 every time an object of this class # is instantiated. recommender = PMFRecommendation(ScoringParams.recommendation_threshold, s3, ScoringParams.num_latent_factors) @app.get('/api/v1/liveness', status_code=200) def liveness(): """Define the linveness probe.""" return {} @app.get('/api/v1/readiness', status_code=200)
SENTRY_DSN = os.environ.get("SENTRY_DSN", "") sentry = Sentry(app, dsn=SENTRY_DSN, logging=True, level=logging.ERROR) app.logger.info('App initialized, ready to roll...') if cloud_constants.USE_CLOUD_SERVICES: s3_client = AmazonS3( bucket_name=cloud_constants.S3_BUCKET_NAME, aws_access_key_id=cloud_constants.AWS_S3_ACCESS_KEY_ID, aws_secret_access_key=cloud_constants.AWS_S3_SECRET_KEY_ID) s3_client.connect() else: from rudra.data_store.local_data_store import LocalDataStore # Change the source directory here for local file system testing. s3_client = LocalDataStore('tests/test_data') recommender = HPFScoring(num_recommendations=10, data_store=s3_client) daiquiri.setup(level=os.environ.get('FLASK_LOGGING_LEVEL', logging.INFO)) _logger = daiquiri.getLogger(__name__) @app.route('/api/v1/liveness', methods=['GET']) def liveness(): """Define the liveness probe.""" return flask.jsonify({}), 200 @app.route('/api/v1/readiness', methods=['GET']) def readiness():
def test_load_rating(self): """Test the load_rating method.""" path = 'test_load_rating.txt' test_datastore = LocalDataStore('tests/test_data') r = load_rating(path, test_datastore) self.assertListEqual(r, [[5409, 2309, 54909, 2054], list()])
def __init__(self, *args, **kwargs): """Initialise the local data store and HPF object.""" super(TestHPFScoringMethods, self).__init__(*args, **kwargs) self.local_obj = LocalDataStore("tests/test_data") self.hpf_obj = HPFScoring(self.local_obj) self.hpf_obj_feedback = HPFScoring(self.local_obj)
setup_logging(app.app) CORS(app.app) global scoring_status global scoring_object global s3_object if HPF_SCORING_REGION != "": if convert_string2bool_env(USE_CLOUD_SERVICES): s3_object = AmazonS3(bucket_name=AWS_S3_BUCKET_NAME, aws_access_key_id=AWS_S3_ACCESS_KEY_ID, aws_secret_access_key=AWS_S3_SECRET_ACCESS_KEY) s3_object.connect() app.scoring_object = HPFScoring(datastore=s3_object) else: app.scoring_object = HPFScoring(LocalDataStore("tests/test_data")) app.scoring_status = True else: app.scoring_status = False current_app.logger.warning("Have not loaded a model for scoring!") def heart_beat(): """Handle the / REST API call.""" return flask.jsonify({"status": "ok"}) def liveness(): """Define the linveness probe.""" return flask.jsonify({"status": "alive"})
class GetData: """This class defines the S3 Connections viz fetching and storing data.""" def __init__(self, aws_access_key_id=None, aws_secret_access_key=None, aws_bucket_name='cvae-insights', local_data_store=False, deployment_prefix='dev', model_version='2019-01-03'): """Create an instance of GetData.""" self.aws_access_key_id = os.environ.get('AWS_S3_ACCESS_KEY_ID', '') self.aws_secret_access_key = os.environ.get('AWS_S3_SECRET_ACCESS_KEY', '') self.github_token = os.environ.get('GITHUB_TOKEN', '') self.bucket_name = aws_bucket_name self.deployment_prefix = deployment_prefix self.version_name = model_version if local_data_store: self.s3_client = LocalDataStore('tests/test_data') else: self.s3_object = AmazonS3( bucket_name=self.bucket_name, aws_access_key_id=self.aws_access_key_id, aws_secret_access_key=self.aws_secret_access_key) self.s3_client = self.load_S3() self.utility = Utility() def load_S3(self): """Establish the connection with S3.""" self.s3_object.connect() if self.s3_object.is_connected(): logger.info("S3 connection established.") return self.s3_object else: raise Exception def load_raw_data(self): """Load the raw data from S3 bucket.""" NPM_raw_data_path = os.path.join(self.version_name, "data/manifest.json") try: raw_data_dict_ = self.s3_client.read_json_file(NPM_raw_data_path) logger.info("Size of Raw Manifest file is: {}".format( len(raw_data_dict_))) return raw_data_dict_ except Exception: raise Exception def load_existing_data(self): """Load the node registry dump from S3 bucket.""" NPM_clean_json_data_path = os.path.join( self.version_name, "data/node-package-details-with-url.json") try: logger.info("Path Existed") existing_data = self.s3_client.read_generic_file( NPM_clean_json_data_path) existing_df = self.utility.read_json_file(existing_data) logger.info("Size of Raw df with url is: {}".format( len(existing_df))) return existing_df except Exception: raise Exception("S3 connection error")
def setUp(self): """Instantiate the resources required for the tests.""" self.fs = LocalDataStore('tests/test_data') self.assertTrue(self.fs.get_name().endswith('tests/test_data')) self.pmf_rec = PMFRecommendation(2, data_store=self.fs, num_latent=50)
"""Contains the path constants for S3 and local storage.""" import os from rudra.data_store.local_data_store import LocalDataStore num_users = os.environ.get("TRAIN_PER_USER", 5) MODEL_VERSION = os.environ.get('MODEL_VERSION', '2019-01-03') TEMPORARY_PATH = '/tmp/trained-model/' TEMPORARY_DATA_PATH = '/tmp/data/' TEMPORARY_MODEL_PATH = '/tmp/intermediate-model/cvae-model/' TEMPORARY_DATASTORE = LocalDataStore('/tmp') TEMPORARY_LATENT_PATH = os.path.join(TEMPORARY_MODEL_PATH, 'latent_pretrain_all') TEMPORARY_SDAE_PATH = os.path.join(TEMPORARY_MODEL_PATH, 'train') TEMPORARY_CVAE_PATH = os.path.join(TEMPORARY_MODEL_PATH, 'cvae') TEMPORARY_PMF_PATH = os.path.join(TEMPORARY_MODEL_PATH, 'pmf-packagedata.mat') TEMPORARY_USER_ITEM_FILEPATH = os.path.join( TEMPORARY_DATA_PATH, "packagedata-train-" + str(num_users) + "-users.dat") TEMPORARY_ITEM_USER_FILEPATH = os.path.join( TEMPORARY_DATA_PATH, "packagedata-train-" + str(num_users) + "-items.dat") USER_ITEM_FILEPATH = os.path.join( MODEL_VERSION, 'data', "packagedata-train-" + str(num_users) + "-users.dat") ITEM_USER_FILEPATH = os.path.join( MODEL_VERSION, 'data', "packagedata-train-" + str(num_users) + "-items.dat") PRECOMPUTED_MANIFEST_PATH = os.path.join(MODEL_VERSION, "data/manifest_user_data.dat") PMF_MODEL_PATH = os.path.join(MODEL_VERSION, 'intermediate-model/cvae-model', 'pmf-packagedata.mat') PACKAGE_TO_ID_MAP = os.path.join(MODEL_VERSION, 'trained-model/package_to_index_map.json')
class TestHPFScoringMethods(unittest.TestCase): """Test functionalities of hpf scoring.""" def __init__(self, *args, **kwargs): """Initialise the local data store and HPF object.""" super(TestHPFScoringMethods, self).__init__(*args, **kwargs) self.local_obj = LocalDataStore("tests/test_data") self.hpf_obj = HPFScoring(self.local_obj) self.hpf_obj_feedback = HPFScoring(self.local_obj) self.package_id_dict = OrderedDict() self.id_package_dict = OrderedDict() self.manifest_id_dict = OrderedDict() def test_basic_object(self): """Test basic HPF object.""" assert self.hpf_obj is not None assert self.hpf_obj.recommender is not None assert self.hpf_obj.m is not None # Currently we are not moving forward with this, but in future will look # on it. So commented. # def test_match_feedback_manifest(self): # """Test match feedback manifest with dummy ids.""" # input_id_set = {1} # id_ = self.hpf_obj_feedback.match_feedback_manifest(input_id_set) # assert int(id_) == -1 # input_id_set = {64, 200, 66, 44} # id_ = self.hpf_obj_feedback.match_feedback_manifest(input_id_set) # assert int(id_) == 0 # id_ = self.hpf_obj.match_feedback_manifest(input_id_set) # assert int(id_) == -1 def test_load_objects(self): """Test logic where incoming data is correct or not.""" self.package_id_dict = self.local_obj.read_json_file( HPF_output_package_id_dict) self.id_package_dict = OrderedDict({ x: n for n, x in self.package_id_dict.get("package_list", {}).items() }) self.package_id_dict = OrderedDict( self.package_id_dict.get("package_list", {})) self.manifest_id_dict = self.local_obj.read_json_file( HPF_output_manifest_id_dict) self.manifest_id_dict = OrderedDict({ n: set(x) for n, x in self.manifest_id_dict.get("manifest_list", {}).items() }) self.assertTrue(isinstance(self.package_id_dict, dict)) self.assertTrue(isinstance(self.id_package_dict, dict)) self.assertTrue(isinstance(self.manifest_id_dict, dict)) def test_recommend_known_user(self): """Test logic where we recommend for a known user(exists in training set).""" recommendation, user_id = self.hpf_obj.recommend_known_user(0) assert recommendation is not None assert user_id is not None def test_recommend_new_user(self): """Test the fold-in logic where we calculate factors for new user.""" recommendation, user_id = self.hpf_obj.recommend_new_user([0]) assert recommendation is not None assert user_id is not None def test_predict_missing(self): """Test no prediction in case of higher than threshold missing package ratio.""" with app.app.app_context(): recommendation = self.hpf_obj.predict(['missing-pkg']) self.assertFalse(recommendation[0]) self.assertTrue(recommendation[2]) def test_model_details(self): """Test the basic model details function.""" details = "The model will be scored against 12405 Packages, 9523 Manifests." print(self.hpf_obj.model_details()) assert self.hpf_obj.model_details() == details # # assert self.hpf_obj_feedback.model_details() == details def test_get_sizeof(self): """Test static _getsizeof method.""" int_value = 1 int_size = 2.6702880859375e-05 assert HPFScoring._getsizeof(int_value) == "{} MB".format(int_size)