def test_store_data_from_provider_service(self): from dataproviders.services import fetch_data_from_provider from dataproviders.services.initialize_data_providers import InitializeDataProviders from metadata.tests import LoadTestData user = LoadTestData.init_user() InitializeDataProviders.load() dpp = LoadTestData.init_strava_data_provider_profile() LoadTestData.create_dummy_provider(dpp) data = fetch_data_from_provider.fetch_data_from_endpoint( provider_name=dpp.data_provider.provider_name, endpoint_name="activity", user_pk=user.pk) data = JsonUtils.validate(data) data_fetch = DataFetch.objects.get() file_as_str = data_fetch.read_as_str() file = JsonUtils.validate(file_as_str) self.assertEqual(data, file)
def try_get_result_data_or_create_it(self, input_filename, result_data): result_file_path = self.build_result_file_path(input_filename) try: return JsonUtils.read_json_file(result_file_path) except FileNotFoundError: JsonUtils.write_to_file(result_data, result_file_path) raise FileNotFoundError(f"Expected result did not exists before but has now been created, " f"check the file {result_file_path} to ensure the result is correct", )
def init_strava_schema_from_file(): LoadTestData.init_foaf() rdf_inst = RdfInstanceService() data_cleaning = DataCleaningService() # load the file testfile = os.path.join( settings.BASE_DIR, "MetaDataApi/metadata/tests/data/json/strava_activities.json") data = JsonUtils.read_json_file(testfile) service = JsonAnalyser() schema_label = "strava" label = "activities" schema = Schema.create_new_empty_schema(schema_label) user = LoadTestData.init_user() LoadTestData.init_profile(user) service.identify_from_json_data(data, schema, user, parrent_label=label) data_cleaning.relate_root_classes_to_foaf(schema) return schema
def test_transform_csv_file(self): file = django_file_utils.convert_str_to_file(self.dummy_csv_string(), filetype=FileType.CSV) result = TransformFilesToData().clean_data_from_file(file) result = JsonUtils.validate(result) expected = [{'Name': 'Alex', 'Sex': 'M', 'Age': '41', 'Height_in': '74', 'Weight_lbs': '170'}, {'Name': 'Bert', 'Sex': 'M', 'Age': '42', 'Height_in': '68', 'Weight_lbs': '166'}] self.assertEqual(result, expected)
def test_serialize(self): dp = MockDataProvider.create_data_provider_with_endpoints() serializer = DataProviderSerializer(dp) expected = { 'oauth_config': None, 'http_config': None, 'endpoints': [{ 'endpoint_name': 'test1', 'endpoint_url': 'testurl', 'request_type': 'GET', 'api_type': 'OauthGraphql' }, { 'endpoint_name': 'test2', 'endpoint_url': 'testurl', 'request_type': 'GET', 'api_type': 'OauthGraphql' }], 'icon_image_url': None, 'provider_name': 'test_provider_name', 'provider_url': None, 'api_endpoint': "test_endpoint" } data = JsonUtils.validate(serializer.data) self.assertEqual(data, expected)
def get_data_from_csv_file(self, file: ContentFile) -> JsonType: with io.TextIOWrapper(file, encoding="utf-8") as text_file: dialect = self.get_csv_dialect(text_file) csv_start = self.read_csv_start_of_file(text_file, dialect) fieldnames, data_start_line_nr = self.get_or_create_fieldnames( csv_start) data = self.get_data_from_csv(text_file, dialect, fieldnames, data_start_line_nr) return JsonUtils.validate(data)
def get_data_providers_from_local_or_remote_file(cls): try: return JsonUtils.read_json_file(cls.data_providers_filename) except FileNotFoundError as e: return cls.get_providers_from_aws() except Exception as e: logger.error( f"another error than fileNotFound has occured Msg: {e}") return cls.get_providers_from_aws()
def request_access_token(code, data_provider): data = build_request_access_token_data(code, data_provider) url = data_provider.oauth_config.access_token_url r = requests.post(url, data=data, allow_redirects=True) if not r.ok: raise OauthRedirectRequestException("the token request did not return with ok. Reason: %s" % r.reason) response_content = JsonUtils.validate(r.content) access_token = response_content.get("access_token") if access_token is "" or None: raise OauthRedirectRequestException("access token was not found in response: %s" % response_content) return response_content
def fetch_data_from_endpoint(provider_name, endpoint_name, user_pk): data_provider = DataProvider.objects.get(provider_name=provider_name) user = User.objects.get(pk=user_pk) endpoint = _get_endpoint(data_provider, endpoint_name) # get the profile, with the access token matching the provider_name third_party_profiles = user.data_provider_users third_party_profile = third_party_profiles.get(data_provider__provider_name=provider_name) data = _fetch_data_from_endpoint(endpoint, third_party_profile.access_token) data = JsonUtils.clean(data) _save_data_to_file(endpoint, user, data) return data
def test_transform_json_file(self): json_file = self.build_json_file() result = TransformFilesToData().clean_data_from_file(json_file) result = JsonUtils.validate(result) expected = {'quiz': {'sport': {'q1': {'question': 'Which one is correct team name in NBA?', 'options': ['New York Bulls', 'Los Angeles Kings', 'Golden State Warriros', 'Huston Rocket'], 'answer': 'Huston Rocket'}}, 'maths': { 'q1': {'question': '5 + 7 = ?', 'options': ['10', '11', '12', '13'], 'answer': '12'}, 'q2': {'question': '12 - 8 = ?', 'options': ['1', '2', '3', '4'], 'answer': '4'}}}} self.assertEqual(result, expected)
def run_regression_test_on_file(self, file): file_path = self.build_test_file_path(file) data_file = django_file_utils.create_django_file_from_local(file_path) dfu = baker.make(DataFileUpload.__name__, make_m2m=True, data_file_from_source=data_file, has_been_refined=False) dfu.refresh_from_db() self.assertIsNotNone(dfu.refined_data_file) file_data = dfu.refined_data_file.data_file.read() file_content = JsonUtils.loads(file_data.decode()) self.assert_result_equals_expected_json(file_content, file)
def read_data_from_data_provider_json_file(cls, fail_on_file_missing=True): try: data = open(cls.data_providers_filename).read() except FileNotFoundError as e: cls.create_empty_provider_file() if fail_on_file_missing: raise e else: return [] else: if not data and fail_on_file_missing: raise FileNotFoundError("there is no providers in the file") else: return JsonUtils.validate(data or [])
def test_deserialize(self): data = MockDataProvider.build_full_data() serializer = DataProviderSerializer(data=data) self.assertTrue(serializer.is_valid(), serializer.errors) expected = { 'oauth_config': { 'scope': ['user.activity'], 'authorize_url': 'https://account.withings.com/oauth2_user/authorize2', 'access_token_url': 'https://account.withings.com/oauth2/token', 'client_id': '123', 'client_secret': '12345' }, 'http_config': { 'url_encoded_params': { 'd': 'a', 'c': 't' }, 'header': { 'User-Agent': 'Tinder/7.5.3 (iPhone; iOS 10.3.2; Scale/2.00)', 'X-Auth-Token': '{AuthToken:}', 'Content-Type': 'application/json' } }, 'endpoints': [{ 'endpoint_name': 'test1', 'endpoint_url': 'testurl', 'request_type': 'GET', 'api_type': 'OauthRest' }, { 'endpoint_name': 'test2', 'endpoint_url': 'testurl', 'request_type': 'GET', 'api_type': 'OauthRest' }], 'icon_image_url': 'http://someurl.com/image', 'provider_name': 'test_provider_name', 'provider_url': None, 'api_endpoint': '56' } serializer.save() serialized_data = JsonUtils.validate(serializer.data) self.assertDictEqual(serialized_data, expected)
def LoadSchemaAndDataFromDataDump(data_fetch_pk, user_pk): user = User.objects.get(pk=user_pk) identify = JsonAnalyser() data_fetch = DataFetch(data_fetch_pk) parrent_label = data_fetch.endpoint.endpoint_name data_as_str = django_file_utils.convert_file_to_str( data_fetch.data_file_from_source.file) data_as_json = JsonUtils.validate(data_as_str) schema = data_fetch.endpoint.data_provider.schema objects = identify.identify_from_json_data(data_as_json, schema, user, parrent_label) service = RdfSchemaService() service.export_schema_from_db(schema) DataFetch(data_fetch_pk).loaded = True return objects
def create_data_file(self, data: JsonType, user: User, data_file_source: DataFileSourceBase, label_info=None): data_file = django_file_utils.convert_str_to_file( JsonUtils.dumps(data), filetype=FileType.JSON) # label_info = label_info or build_label_info(data_file_source) data_file_object = DataFile.objects.create( data_file=data_file, user=user, label_info=label_info, data_provider=data_file_source.data_provider) self._update_source_object(data_file_object, data_file_source) return data_file_object
def init_strava_data_from_file(): user = LoadTestData.init_user() LoadTestData.init_profile(user) service = JsonAnalyser() # load the file testfile = os.path.join( settings.BASE_DIR, "MetaDataApi/metadata/tests/data/json/strava_activities.json") data = JsonUtils.read_json_file(testfile) schema = Schema.objects.get(label="strava") label = "activities" objects = service.identify_from_json_data(data, schema, parrent_label=label, owner=user) return objects
def IdentifySchemaFromProviderService(provider_name, endpoint, user_pk): user = User.objects.get(pk=user_pk) identify = JsonAnalyser() provider_profile = user.profile.get_data_provider_profile(provider_name) schema = Schema.objects.get(label=provider_name) data_provider = DataProvider.objects.get(provider_name=provider_name) if endpoint == "all" or endpoint is None: endpoints = json.loads(data_provider.rest_endpoints_list) else: endpoints = [ endpoint, ] n_objs = 0 for endpoint in endpoints: data = fetch_data_from_provider.fetch_data_from_endpoint( endpoint, provider_profile.access_token, user_pk) json_data = JsonUtils.validate(data) parrent_label = BaseMetaDataService.rest_endpoint_to_label(endpoint) objects = identify.identify_from_json_data(json_data, schema, user, parrent_label) n_objs += len(objects) return n_objs
def IdentifyDataFromProviderService(provider_name, endpoint, user_pk): user = User.objects.get(pk=user_pk) data_provider = DataProvider.objects.get(provider_name=provider_name) identify = JsonAnalyser() rdf_service = RdfInstanceService() provider_profile = user.profile.get_data_provider_profile(provider_name) schema = rdf_service.do_meta_item_exists(Schema(label=provider_name)) # select which endpoints if endpoint == "all" or endpoint is None: endpoints = json.loads(data_provider.rest_endpoints_list) else: endpoints = [ endpoint, ] # identify objects for each endpoint schema_nodes = [] for endpoint in endpoints: data = fetch_data_from_provider.fetch_data_from_endpoint( endpoint, provider_profile.access_token, user_pk) json_data = JsonUtils.validate(data) parrent_label = BaseMetaDataService.rest_endpoint_to_label(endpoint) objects = identify.identify_from_json_data(json_data, schema, parrent_label) schema_nodes.extend(objects) # generate rdf file from data rdf_file = rdf_service.export_instances_to_rdf_file(schema, objects) return rdf_file, schema_nodes
def build_test_data_file(self): data = data_fetch_json_strava_activity() data_str = JsonUtils.dumps(data) return django_file_utils.convert_str_to_file( data_str, filetype=django_file_utils.FileType.JSON)
def read_file_from_aws(cls, file_name: str): s3_resource = cls.get_aws_session() text = cls.read_aws_file(file_name, s3_resource) json_obj = JsonUtils.validate(text) return json_obj
def dummy_json_string(self): return JsonUtils.dumps(self.dummy_json_data_structure())
def try_read_json_file(self, filename): try: JsonUtils.read_json_file(filename) except FileNotFoundError: pass
def build_header(self): try: return dict(JsonUtils.loads(self.header)) except Exception as e: return dict()
def build_scopes_string(self): if self.scope: if isinstance(self.scope, list): return " ".join(self.scope) else: # string return " ".join(JsonUtils.loads(str(self.scope)))
def request_refresh_token(data, url): r = requests.post(url, data=data, allow_redirects=True) if not r.ok: raise OauthRedirectRequestException("the token request did not return with ok. Reason: %s" % r.reason) response_content = JsonUtils.validate(r.content) return response_content
def get_data_from_json_file(self, file: ContentFile) -> JsonType: data = convert_file_to_str(file) return JsonUtils.validate(data)
def read_data(self): return JsonUtils.loads(self.data_file.read())
def loadStravaActivities(cls): testfile = os.path.join( settings.BASE_DIR, "metadata/tests/data/json/strava_activities.json") data = JsonUtils.read_json_file(testfile) return data
def write_data_to_json_file(cls, data, filename=data_providers_filename): with open(filename, 'w+') as data_providers_file: data_providers_file.write(JsonUtils.dumps(data))