def pull(ctx: any, data_context: DataContext, remote_url: str, include_collections: Tuple[str], exclude_collections: Tuple[str]): temp_dir_path = Path(__file__).parent.parent / 'output' / 'temp' data_url = ResourceLocator.merge_urls( base_url=remote_url, url=ResourceLocator.append_locator_parameters( url=f'/output/data/datadragon{data_context.suffix}/', parameters={ 'dir': str(temp_dir_path), 'verbose': '1', } ) ) data_locator = locator_factory(url=data_url) if include_collections or exclude_collections: data_paths = data_locator.list() filtered_data_paths = [] for path_name in data_paths: if path_name.endswith('/'): continue path = Path(path_name) collection_name = path.name[:-len(''.join(path.suffixes))] if collection_name in exclude_collections: continue if include_collections and collection_name not in include_collections: continue filtered_data_paths.append(str(path)) path_locators = { p: locator_factory(url=ResourceLocator.merge_urls( base_url=data_url, url=f'/{p}' )) for p in filtered_data_paths } pulled_paths = { p: l.get() for p, l in path_locators.items() } ctx.invoke( purge ) for path, pulled_path in pulled_paths.items(): destination_path = data_context.data_path / path os.makedirs(str(destination_path.parent), mode=0o755, exist_ok=True) shutil.move(pulled_path, str(destination_path)) Path(pulled_path).parent.rmdir() else: temp_data_path = Path(data_locator.get()) ctx.invoke( purge ) shutil.move(str(temp_data_path), str(data_context.data_path)) shutil.rmtree(str(temp_data_path.parent))
def push(data_context: DataContext, remote_url: str): data_url = ResourceLocator.merge_urls( base_url=remote_url, url=f'/output/data/datadragon{data_context.suffix}/?locator=1&verbose=1', ) data_locator = locator_factory(url=data_url) data_locator.put(resource=os.path.join(str(data_context.data_path), ''))
def files_push(files_context: FilesContext, remote_url: str): data_url = ResourceLocator.merge_urls( base_url=remote_url, url=f'/output/files/files{files_context.suffix}/?locator=1&verbose=1', ) data_locator = locator_factory(url=data_url) data_locator.put(resource=os.path.join(str(files_context.files_path), ''))
def register_decryptors(access_credentials: Dict[str, any]): for name, private_key_url in access_credentials['secrets'].items(): locator = locator_factory(url=private_key_url) private_key_bytes = locator.get() password = locator.get_locator_parameter(parameter='password') decryptor = Decryptor(private_key=private_key_bytes, password=password.encode(), name=name) Decryptor.register_decryptor(decryptor=decryptor)
def configure_encryption(self): for name, private_key_url in self.configuration['encrypt'].items(): locator = locator_factory(url=private_key_url) private_key_bytes = locator.get() password = locator.get_locator_parameter(parameter='password') decryptor = Decryptor( private_key=private_key_bytes, password=password.encode(), name=name ) Decryptor.register_decryptor(decryptor=decryptor)
def prepare_map_context(self) -> Dict[str, any]: map_context = {**self.map_context} for url in self.map_context_urls: locator = locator_factory(url=url) url_contents = locator.get() if isinstance(url_contents, bytes): url_contents = url_contents.decode() url_graphs = json.loads( url_contents) if self.strict_json else JsonComment().loads( url_contents) map_context.update(url_graphs) return map_context
def run(self, resource: Dict[str, any]): if self.url_key not in resource: return resource shared_resource_locator = locator_factory(url=resource[self.url_key]) self._check_scheme(locator=shared_resource_locator) encrypt = shared_resource_locator.get_locator_parameter('encrypt') if encrypt: assert self.encrypt_key in resource, f'Shared resouce missing property {self.encrypt_key} for encrypted URL' with Cryptor.local_registries(): locator = locator_factory(url=resource[self.encrypt_key]) self._check_scheme(locator=locator) private_key_bytes = locator.get() assert isinstance( private_key_bytes, bytes ), f'Shared resource property {self.encrypt_key} must be a URL retrieving binary private key bytes' password = locator.get_locator_parameter( self.encrypt_password_parameter) decryptor = Decryptor(private_key=private_key_bytes, password=password.encode(), name=encrypt) Decryptor.register_decryptor(decryptor=decryptor) shared_resource = shared_resource_locator.get() else: shared_resource = shared_resource_locator.get() if self.override_key in resource: assert isinstance( resource[self.override_key], dict ), f'Shared resource override property {self.override_key} must be of type dict' assert isinstance( shared_resource, dict ), f'Shared resource must be of type dict to support overrrides in property {self.override_key}' shared_resource = { **shared_resource, **resource[self.override_key], } return shared_resource
def files_pull(ctx: any, files_context: FilesContext, remote_url: str): temp_dir_path = Path(__file__).parent.parent / 'output' / 'temp' data_url = ResourceLocator.merge_urls( base_url=remote_url, url=ResourceLocator.append_locator_parameters( url=f'/output/files/files{files_context.suffix}/', parameters={ 'dir': str(temp_dir_path), 'verbose': '1', } ) ) data_locator = locator_factory(url=data_url) temp_data_path = Path(data_locator.get()) ctx.invoke( files_purge ) shutil.move(str(temp_data_path), str(files_context.files_path)) shutil.rmtree(str(temp_data_path.parent))
def run(self, credentials: Dict[str, any]): now = datetime.utcnow() if self.event['pathParameters']['api_key'] not in credentials[ 'api_keys']: return self.api_key_error_response raw_state = self.event['queryStringParameters']['state'] if not self.verify_state_signature( raw_state=raw_state, initialization_vector=credentials['initialization_vector']): return self.invalid_state_error_response state = self.decipher_state( raw_state=raw_state, cipher_key=credentials['cipher_key'], initialization_vector=credentials['initialization_vector']) if state['expire'] < datetime.timestamp(now): return self.invalid_state_error_response if state['app_id'] not in credentials['app_secrets']: return self.invalid_state_error_response response_locator = self.create_response_locator( name=state['name'], store_url=credentials['store_url']) encryptor_name = response_locator.get_locator_parameter('encrypt') self.register_encryptor( certificate_url=credentials['encryptors'][encryptor_name], name=encryptor_name) response = self.retrieve_access_token( app_id=state['app_id'], app_secret=credentials['app_secrets'][state['app_id']], code=self.event['queryStringParameters']['auth_code']) if response is None: return self.invalid_code_error_response store_locator = locator_factory(url=credentials['store_url']) store_list = store_locator.list() if store_list and f'{state["name"]}.json' in store_list: return self.duplicate_state_error_response response_locator.put(resource=json.dumps(response).encode()) return self.success_response
def refresh_tokens(): with open(Path(__file__).parent / 'local_access_credentials.json') as f: access_credentials = json.load(f) register_decryptors(access_credentials=access_credentials) credentials_locator = locator_factory( url=access_credentials['credentials_url']) credentials = json.loads(credentials_locator.get().decode()) payload = { 'app_id': credentials['app_id'], 'secret': credentials['secret'], 'grant_type': 'refresh_token', 'refresh_token': credentials['refresh_token'], } response = requests.post( 'https://ads.tiktok.com/open_api/oauth2/refresh_token/', json=payload) response_json = response.json()['data'] refreshed_credentials = { **credentials, 'refresh_token': response_json['refresh_token'], 'access_token': response_json['access_token'], } credentials_locator.put( resource=json.dumps(refreshed_credentials, indent=2).encode()) return response_json
def api_test_rule(data_dragon: DataDragon, test_url: Optional[str], credential_url: Optional[str], rule_url: Optional[str], user_id: Optional[str], rule_id: Optional[str], channel: Optional[str], from_date: Optional[str], to_date: Optional[str], granularity: Optional[str]): assert channel is not None or test_url is not None or rule_url is not None or rule_id is not None, 'One of --channel --test-url, --rule-url, or --rule-id is required' data_dragon.configure_encryption() if test_url is None and channel is not None: test_path = Path(__file__).parent.parent / 'input' / 'test' / 'rule' / f'test_{channel}.json' test_url = str(test_path) if test_path.exists() else None test_configuration = locator_factory(url=test_url).get().decode() if test_url is not None else {} test = io_pruned_structure({ **json.loads(test_configuration), **({'credential_url': credential_url} if credential_url is not None else {}), **({'rule_id': rule_id} if rule_id is not None else {}), **({'user_id': user_id} if user_id is not None else {}), **({'rule_url': rule_url} if rule_url is not None else {}), **({'channel': channel} if channel is not None else {}), **({'from_date': from_date} if from_date is not None else {}), **({'to_date': to_date} if to_date is not None else {}), **({'granularity': granularity} if granularity is not None else {}), }) test_format = Format().bold().cyan() data_dragon.user.present_message(test_format(f'††† Running test configuration\n{json.dumps(test, indent=2)}')) if 'channel' in test: if 'credential_url' not in test: test['credential_url'] = f'alias://credentials/test/test_{test["channel"]}.{"zip" if test["channel"] == "apple_search_ads" else "json"}' if 'rule_url' not in test and 'rule_id' not in test: test['rule_url'] = str(Path(__file__).parent.parent / 'input' / 'test' / 'rule' / f'test_{test["channel"]}_rule.json') run_context = APIRunContext(data_dragon=data_dragon) password = data_dragon.generate_password() if 'user_id' not in test: user = run_context.run_api_command( command=['user', 'create'], command_args=[ '-q', '-t', '-w', password, '{"local":{"email":"*****@*****.**"},"name":"TestUser"}', ], load_output=True ) data_dragon.user.present_message(test_format(f'††† Created test user {user["_id"]}')) else: user = {'_id': test['user_id']} if 'credential_url' in test: if channel == 'apple_search_ads': credential_json = '{"name":"AppleTestCredential","target":"apple_search_ads"}' certificate_locator = locator_factory(url=test['credential_url']) certificate_locator.safe = False certificate_contents = certificate_locator.get() certificate_fd, certificate_file_path = tempfile.mkstemp(prefix=str(Path(__file__).parent.parent / 'output' / 'temp' / 'test_')) try: os.write(certificate_fd, certificate_contents) os.close(certificate_fd) credential = run_context.run_api_command( command=['credential', 'create'], command_args=[ '-q', '-t', '-u', user['_id'], '-c', certificate_file_path, credential_json, ], load_output=True ) finally: Path(certificate_file_path).unlink() else: credential_json = locator_factory(url=test['credential_url']).get().decode() credential = run_context.run_api_command( command=['credential', 'create'], command_args=[ '-q', '-t', '-u', user['_id'], credential_json, ], load_output=True ) data_dragon.user.present_message(test_format(f'††† Created test credential {credential["_id"]}')) else: credential = None if 'rule_url' in test and 'rule_id' not in test: rule_locator = locator_factory(url=test['rule_url']) rule_json = rule_locator.get().decode() rule = run_context.run_api_command( command=['rule', 'create'], command_args=[ '-q', '-t', *([ '-u', user['_id'], '-c', credential['path'], ] if credential is not None else []), rule_json, ], load_output=True ) data_dragon.user.present_message(test_format(f'††† Created {channel} test rule {rule["_id"]}')) elif 'rule_id' in test: rule = {'_id': test['rule_id']} else: rule = {} data_dragon.user.present_message(test_format(f'††† Performing live run of {channel} test rule {rule["_id"]}')) run_overrides = [ *(['-g', test['granularity']] if 'granularity' in test else []), *(['-f', test['from_date']] if 'from_date' in test else []), *(['-t', test['to_date']] if 'to_date' in test else []), ] run_context.run_api_command( command=['rule', 'run'], command_args=[ *run_overrides, '--allow-non-dry-run', rule['_id'], ], ) data_dragon.user.present_message(test_format(f'††† Retrieving live actions from {channel} test rule {rule["_id"]}')) history = run_context.run_api_command( command=['rule', 'show-history'], command_args=[ '-q', rule['_id'], ], load_output=True ) actions = list(filter(lambda h: h['historyType'] == 'action', history)) assert actions, 'No actions in test rule history' def check_apple_search_ads_actions(actions: List[Dict[str, any]]): for action in actions: match = re.search(r'from ([^ ]+) to ([^ ]+)', action['actionDescription']) action['adjustmentFrom'] = float(match.group(1)) action['adjustmentTo'] = float(match.group(2)) assert action['adjustmentFrom'] != action['adjustmentTo'], f'No adjustment made for action {action}' if channel == 'apple_search_ads': check_apple_search_ads_actions(actions) data_dragon.user.present_message(test_format(f'††† Clearing live actions for {channel} test rule {rule["_id"]}')) run_context.run_api_command( command=['rule', 'clear-history'], command_args=[ rule['_id'], ], ) data_dragon.user.present_message(test_format(f'††† Performing dry run of {channel} test rule {rule["_id"]}')) run_context.run_api_command( command=['rule', 'run'], command_args=[ '-g', 'DAILY', '-f', '2020-05-01', '-t', '2020-05-07', rule['_id'], ], ) data_dragon.user.present_message(test_format(f'††† Retrieving dry run actions from {channel} test rule {rule["_id"]}')) dry_run_history = run_context.run_api_command( command=['rule', 'show-history'], command_args=[ '-q', rule['_id'], ], load_output=True ) dry_run_actions = list(filter(lambda h: h['historyType'] == 'action', dry_run_history)) if channel == 'apple_search_ads': check_apple_search_ads_actions(dry_run_actions) def check_live_and_dry_actions(actions: List[Dict[str, any]], dry_run_actions: List[Dict[str, any]]): def check_adjustment_difference(actual: any, expected: any): if type(actual) is float and type(expected) is float: return actual == expected or (abs(expected - actual) < 0.001 and abs((expected - actual) / expected)) < 0.001 return actual == expected assert len(actions) == len(dry_run_actions), f'{len(actions)} action count does not match {len(dry_run_actions)} dry run action count' for action in actions: dry_actions = list(filter(lambda a: a['targetType'] == action['targetType'] and a['targetID'] == action['targetID'], dry_run_actions)) assert dry_actions, f'No matching dry run action found for target {action["targetType"]} {action["targetID"]}' assert check_adjustment_difference(dry_actions[0]['adjustmentFrom'], action['adjustmentTo']), f'Dry run found {action["targetType"]} {action["targetID"]} in state {dry_actions[0]["adjustmentFrom"]}, which does not match live adjusment to state {action["adjustmentTo"]}' check_live_and_dry_actions(actions, dry_run_actions) adjustment_output = '\n'.join(f'{a["targetType"]} {a["targetID"]} {a["adjustmentType"]} from {a["adjustmentFrom"]} to {a["adjustmentTo"]}' for a in actions) data_dragon.user.present_message(test_format(f'††† Finished test with {channel} test rule {rule["_id"]}\nConfiguration:\n{json.dumps(test, indent=2)}\nAdjustments:\n{adjustment_output}'))
def migrate(data_dragon: DataDragon, data_context: DataContext, from_version: str, to_version: str): script_name = f'{from_version}__to__{to_version}' query_path = Path(__file__).parent.parent / 'migrate' / 'queries' / f'{script_name}.json' script_path = Path(__file__).parent.parent / 'migrate' / 'scripts' / f'{script_name}.py' javascript_path = Path(__file__).parent.parent / 'migrate' / 'scripts' / f'{script_name}.js' assert query_path.exists() or script_path.exists() or javascript_path.exists() queries = [] if query_path.exists(): query_url = f'{query_path}?locator=1&encoding=utf-8' query_locator = locator_factory(url=query_url) query_text = query_locator.get() queries.append(SQL.Query(query=query_text)) layer = SQL.Layer() layer.connect() def run_query(query: SQL.Query, confirmation: Optional[str]='Run this migration query?') -> Optional[any]: log.log(query.substituted_query) if confirmation is not None and not data_dragon.user.present_confirmation( confirmation.format(query=query), default_response=True ): return None return query.run(sql_layer=layer) if script_path.exists(): data_dragon.user.locals = { **data_dragon.user.python_locals, 'log': log, 'user': data_dragon.user, 'SQL': SQL, 'layer': layer, 'queries': queries, 'run_query': run_query, 'db': layer.get_database(), } data_dragon.user.script_directory_components = [ 'migrate', 'scripts', ] data_dragon.user.run_script(script_name=script_name) if javascript_path.exists(): if data_dragon.user.present_confirmation( f'{javascript_path.read_text()}\n\nRun this script from {javascript_path}', default_response=True ): process.call_process(run_args=[ 'node', str(javascript_path), ]) for index, query in enumerate(queries): run_query( query=query, confirmation=f'Run migration query {index + 1} of {len(queries)}?' ) layer.commit() layer.disconnect()
def register_encryptor(self, certificate_url: str, name: str): locator = locator_factory(url=certificate_url) public_key_bytes = locator.get() encryptor = Encryptor(public_key=public_key_bytes, name=name) Encryptor.register_encryptor(encryptor=encryptor)
def create_response_locator(self, name: str, store_url: str): store_url = ResourceLocator.dealias_url(url=store_url) response_url = ResourceLocator.join_path(url=store_url, path=f'{name}.json') response_locator = locator_factory(url=response_url) return response_locator