def _reload_collection(self, collection_id): """Reloads the collection in dev_mode corresponding to the given collection id. Args: collection_id: str. The collection id. Raises: Exception. Cannot reload a collection in production. """ if constants.DEV_MODE: logging.info('[ADMIN] %s reloaded collection %s' % (self.user_id, collection_id)) collection_services.load_demo( python_utils.convert_to_bytes(collection_id)) rights_manager.release_ownership_of_collection( user_services.get_system_user(), python_utils.convert_to_bytes(collection_id)) else: raise Exception('Cannot reload a collection in production.')
def map(item): try: item.put(update_last_updated_time=False) except Exception as e: model_name = item.__class__.__name__ model_id = item.id identifier_message = '%s with id %s failed with error: %s' % ( model_name, model_id, python_utils.convert_to_bytes(e)) yield ('FAILURE', identifier_message) else: yield ('SUCCESS', 1)
def __init__(self, id_property, model_id, target_kind, target_id): """Initializes a new ModelRelationshipError. Args: id_property: ModelProperty. The property referring to the ID of the target model. model_id: bytes. The ID of the model with problematic ID property. target_kind: str. The kind of model the property refers to. target_id: bytes. The ID of the specific model that the property refers to. NOTE: This is the value of the ID property. """ # NOTE: IDs are converted to bytes because that's how they're read from # and written to the datastore. super(ModelRelationshipError, self).__init__(id_property.model_kind, model_id=python_utils.convert_to_bytes(model_id)) self.message = ('%s=%r should correspond to the ID of an existing %s, ' 'but no such model exists' % (id_property, python_utils.convert_to_bytes(target_id), target_kind))
def get(self, page_context, page_identifier, asset_type, encoded_filename): """Returns an asset file. Args: page_context: str. The context of the page where the asset is required. page_identifier: str. The unique identifier for the particular context. Valid page_context: page_identifier pairs: exploration: exp_id story: story_id topic: topic_id skill: skill_id subtopic: topic_name of the topic that it is part of. asset_type: str. Type of the asset, either image or audio. encoded_filename: str. The asset filename. This string is encoded in the frontend using encodeURIComponent(). """ if not constants.DEV_MODE: raise self.PageNotFoundException try: filename = python_utils.urllib_unquote(encoded_filename) file_format = filename[(filename.rfind('.') + 1):] # If the following is not cast to str, an error occurs in the wsgi # library because unicode gets used. self.response.headers[ 'Content-Type'] = python_utils.convert_to_bytes( '%s/%s' % (asset_type, file_format)) if page_context == feconf.ENTITY_TYPE_SUBTOPIC: entity_type = feconf.ENTITY_TYPE_TOPIC topic = topic_fetchers.get_topic_by_name(page_identifier) entity_id = topic.id elif (page_context == feconf.ENTITY_TYPE_EXPLORATION or page_context == feconf.ENTITY_TYPE_SKILL or page_context == feconf.ENTITY_TYPE_TOPIC or page_context == feconf.ENTITY_TYPE_STORY): entity_type = page_context entity_id = page_identifier else: raise self.InvalidInputException fs = fs_domain.AbstractFileSystem( fs_domain.DatastoreBackedFileSystem(entity_type, entity_id)) raw = fs.get('%s/%s' % (asset_type, filename)) self.response.cache_control.no_cache = None self.response.cache_control.public = True self.response.cache_control.max_age = 600 self.response.write(raw) except: raise self.PageNotFoundException
def commit(self, filepath, raw_bytes, mimetype=None): """Replaces the contents of the file with the given by test string. Args: filepath: str. The path to the relevant file within the entity's assets folder. raw_bytes: str. The content to be stored in the file. mimetype: str. The content-type of the file. """ raw_bytes = python_utils.convert_to_bytes(raw_bytes) self._check_filepath(filepath) self._impl.commit(filepath, raw_bytes, mimetype)
def base64_from_int(value): # type: (int) -> Text """Converts the number into base64 representation. Args: value: int. Integer value for conversion into base64. Returns: *. Returns the base64 representation of the number passed. """ byte_value = b'[' + python_utils.convert_to_bytes(value) + b']' # type: ignore[no-untyped-call] return base64.b64encode(byte_value)
def generate_signature(secret, message, vm_id): """Generates digital signature for given data. Args: secret: bytes. The secret used to communicate with Oppia-ml. message: bytes. The message payload data. vm_id: str. The ID of the VM that generated the message. Returns: str. The signature of the payload data. """ encoded_vm_id = python_utils.convert_to_bytes(vm_id) message = b'%s|%s' % (base64.b64encode(message), encoded_vm_id) return hmac.new(secret, msg=message, digestmod=hashlib.sha256).hexdigest()
def test_redis_configuration_file_matches_feconf_redis_configuration(self): """Tests that the redis configuration file and feconf variables have the same port definition. """ self.assertTrue(os.path.exists( os.path.join(common.CURR_DIR, 'redis.conf'))) with python_utils.open_file( os.path.join(common.CURR_DIR, 'redis.conf'), 'r') as redis_conf: lines = redis_conf.readlines() elements = lines[0].split() self.assertEqual(len(elements), 2) self.assertEqual( elements[1], python_utils.convert_to_bytes(feconf.REDISPORT))
def map(item): if item.deleted: return exploration = exp_fetchers.get_exploration_from_model(item) exp_rights = rights_manager.get_exploration_rights(item.id) try: if exp_rights.status == rights_domain.ACTIVITY_STATUS_PRIVATE: exploration.validate() else: exploration.validate(strict=True) except utils.ValidationError as e: yield (item.id, python_utils.convert_to_bytes(e))
def get_e2e_suite_names_from_script_travis_yml_file(): """Extracts the script section from the .travis.yml file. Returns: list(str). An alphabetically-sorted list of names of test suites from the script section in the .travis.yml file. """ travis_file_content = read_and_parse_travis_yml_file() script_str = python_utils.convert_to_bytes(travis_file_content['script']) # The following line extracts the test suites from patterns like # python -m scripts.run_e2e_tests --suite="accessibility". e2e_test_suite_regex = re.compile(r'--suite="([a-zA-Z_-]*)"') suites_list = e2e_test_suite_regex.findall(script_str) return sorted(suites_list)
def verify_signature(oppia_ml_auth_info): """Function that checks if the signature received from the VM is valid. Args: oppia_ml_auth_info: OppiaMLAuthInfo. Domain object containing authentication information. Returns: bool. Whether the incoming request is valid. """ secret = None for val in config_domain.VMID_SHARED_SECRET_KEY_MAPPING.value: if val['vm_id'] == oppia_ml_auth_info.vm_id: secret = python_utils.convert_to_bytes(val['shared_secret_key']) break if secret is None: return False generated_signature = generate_signature( secret, python_utils.convert_to_bytes(oppia_ml_auth_info.message), oppia_ml_auth_info.vm_id) if generated_signature != oppia_ml_auth_info.signature: return False return True
def _generate_id(cls, exp_id, exp_version, state_name): """Generates a unique ID for the Classifier Exploration Mapping of the form [exp_id].[exp_version].[state_name]. Args: exp_id: str. ID of the exploration. exp_version: int. The exploration version at the time this training job was created. state_name: unicode. The name of the state to which the classifier belongs. Returns: str. ID of the new Classifier Exploration Mapping instance. """ new_id = '%s.%s.%s' % (exp_id, exp_version, state_name) return python_utils.convert_to_bytes(new_id)
def post(self): payload = json.loads(self.request.body.decode()) if 'fn_identifier' not in payload: raise Exception( 'This request cannot defer tasks because it does not contain a ' 'function identifier attribute (fn_identifier). Deferred tasks ' 'must contain a function_identifier in the payload.') if payload['fn_identifier'] not in self.DEFERRED_TASK_FUNCTIONS: raise Exception( 'The function id, %s, is not valid.' % python_utils.convert_to_bytes(payload['fn_identifier'])) deferred_task_function = self.DEFERRED_TASK_FUNCTIONS[ payload['fn_identifier']] deferred_task_function(*payload['args'], **payload['kwargs']) self.render_json({})
def get(self, page_context, page_identifier, asset_type, encoded_filename): """Returns an asset file. Args: page_context: str. The context of the page where the asset is required. page_identifier: str. The unique identifier for the particular context. Valid page_context: page_identifier pairs: exploration: exp_id story: story_id topic: topic_id skill: skill_id subtopic: topic_name of the topic that it is part of. asset_type: str. Type of the asset, either image or audio. encoded_filename: str. The asset filename. This string is encoded in the frontend using encodeURIComponent(). """ if not constants.EMULATOR_MODE: raise self.PageNotFoundException try: filename = python_utils.urllib_unquote(encoded_filename) file_format = filename[(filename.rfind('.') + 1):] # If the following is not cast to str, an error occurs in the wsgi # library because unicode gets used. content_type = ( 'image/svg+xml' if file_format == 'svg' else '%s/%s' % ( asset_type, file_format)) self.response.headers[b'Content-Type'] = ( python_utils.convert_to_bytes(content_type)) if page_context not in self._SUPPORTED_PAGE_CONTEXTS: raise self.InvalidInputException fs = fs_domain.AbstractFileSystem( fs_domain.GcsFileSystem(page_context, page_identifier)) raw = fs.get('%s/%s' % (asset_type, filename)) self.response.cache_control.no_cache = None self.response.cache_control.public = True self.response.cache_control.max_age = 600 self.response.write(raw) except Exception as e: logging.exception( 'File not found: %s. %s' % (encoded_filename, e)) raise self.PageNotFoundException
def run(self): try: self.output = self.func() if self.verbose: log('LOG %s:' % self.name, show_time=True) log(self.output) log('----------------------------------------') log('FINISHED %s: %.1f secs' % (self.name, time.time() - self.start_time), show_time=True) self.finished = True except Exception as e: self.exception = e if 'KeyboardInterrupt' not in python_utils.convert_to_bytes( self.exception.args[0]): log('ERROR %s: %.1f secs' % (self.name, time.time() - self.start_time), show_time=True) self.finished = True
def setUp(self): super(NextJobHandlerTest, self).setUp() self.exp_id = 'exp_id1' self.title = 'Testing Classifier storing' self.category = 'Test' interaction_id = 'TextInput' self.algorithm_id = feconf.INTERACTION_CLASSIFIER_MAPPING[ interaction_id]['algorithm_id'] self.algorithm_version = feconf.INTERACTION_CLASSIFIER_MAPPING[ interaction_id]['algorithm_version'] self.training_data = [ { u'answer_group_index': 1, u'answers': [u'a1', u'a2'] }, { u'answer_group_index': 2, u'answers': [u'a2', u'a3'] } ] self.job_id = classifier_models.ClassifierTrainingJobModel.create( self.algorithm_id, interaction_id, self.exp_id, 1, datetime.datetime.utcnow(), self.training_data, 'Home', feconf.TRAINING_JOB_STATUS_NEW, 1) self.classifier_data = text_classifier_pb2.TextClassifierFrozenModel() self.classifier_data.model_json = '' fs_services.save_classifier_data( self.exp_id, self.job_id, self.classifier_data) self.expected_response = { u'job_id': self.job_id, u'training_data': self.training_data, u'algorithm_id': self.algorithm_id, u'algorithm_version': self.algorithm_version } self.payload = {} self.payload['vm_id'] = feconf.DEFAULT_VM_ID secret = feconf.DEFAULT_VM_SHARED_SECRET self.payload['message'] = json.dumps({}) self.payload['signature'] = classifier_services.generate_signature( python_utils.convert_to_bytes(secret), self.payload['message'], self.payload['vm_id'])
def map(item): if item.deleted: return try: exploration = exp_fetchers.get_exploration_from_model(item) except Exception as e: yield ('Error %s when loading exploration' % python_utils.convert_to_bytes(e), [item.id]) return html_list = exploration.get_all_html_content_strings() err_dict = html_validation_service.validate_rte_format( html_list, feconf.RTE_FORMAT_CKEDITOR) for key in err_dict: if err_dict[key]: yield ('%s Exp Id: %s' % (key, item.id), err_dict[key])
def commit(self, filepath, raw_bytes, mimetype=None): """Replaces the contents of the file with the given by test string. Args: filepath: str. The path to the relevant file within the entity's assets folder. raw_bytes: str. The content to be stored in the file. mimetype: str. The content-type of the file. If mimetype is set to 'application/octet-stream' then raw_bytes is expected to contain binary data. In all other cases, raw_bytes is expected to be textual data. """ # Note that textual data needs to be converted to bytes so that it can # be stored in a file opened in binary mode. However, it is not # required for binary data (i.e. when mimetype is set to # 'application/octet-stream'). file_content = (python_utils.convert_to_bytes(raw_bytes) if mimetype != 'application/octet-stream' else raw_bytes) self._check_filepath(filepath) self._impl.commit(filepath, file_content, mimetype)
def update_flaky_tests_count(sheet, row_index, current_count): """Updates the flaky tests count in the google sheet. Args: sheet: googleapiclient.discovery.Resource. The spreedsheet object. row_index: int. The index of the row to update in the sheet. current_count: int. The current count of this flake in the sheet. """ sheet_id = os.getenv('FLAKY_E2E_TEST_SHEET_ID') if sheet_id is not None: values = [[current_count + 1]] body = {'values': values} sheet.values().update(spreadsheetId=sheet_id, range='Log!F' + python_utils.convert_to_bytes(row_index + 5), valueInputOption='USER_ENTERED', body=body).execute() python_utils.PRINT('** NOTE: Updated sheet for first failing test **')
def get_e2e_suite_names_from_jobs_travis_yml_file(): """Extracts the test suites from env/jobs section from the .travis.yml file. Returns: list(str): An alphabetically-sorted list of names of test suites from the jobs section in the .travis.yml file. """ travis_file_content = read_and_parse_travis_yml_file() jobs_str = python_utils.convert_to_bytes( travis_file_content['env']['jobs']) suites_from_jobs = [] # The following line extracts the test suite name from the jobs section # that is in the form RUN_E2E_TESTS_ACCESSIBILITY=true. test_regex = re.compile(r'RUN_E2E_TESTS_([A-Z_]*)=') jobs = test_regex.findall(jobs_str) for job in jobs: suites_from_jobs.append(utils.snake_case_to_camel_case(job.lower())) return sorted(suites_from_jobs)
def map(model_instance): """Implements a map function which defers to a pre-defined validator.""" model_name = model_instance.__class__.__name__ validator_cls_name = '%sValidator' % model_name # Module name for models is of the form: # 'core.storage.<model-type>.gae_models'. # Module name for validators is of the form: # 'core.domain.<model-type>_validators'. # So, we extract the module name for models to obtain the module name # for validators. There is no extra test required to verify that models # and validators have names defined based on model-type since if they # don't the validators test will automatically fail based on the import # we perform here for validators. model_module_name = model_instance.__module__ model_type = model_module_name.split('.')[2] validator_module_name = '%s_validators' % model_type # TODO(#10415): This try catch is required until all the validators are # refactored. Remove the try catch block once #10415 is fixed. try: validator_module = importlib.import_module( 'core.domain.%s' % validator_module_name) except ImportError: validator_module = importlib.import_module( 'core.domain.prod_validators') validator = getattr(validator_module, validator_cls_name) if not model_instance.deleted: validator.validate(model_instance) else: validator.validate_deleted(model_instance) if len(validator.errors) > 0: for error_key, error_list in validator.errors.items(): error_message = ( ((',').join(set(error_list))).encode(encoding='utf-8')) yield ( 'failed validation check for %s of %s' % ( error_key, model_name), python_utils.convert_to_bytes(error_message) ) else: yield ('%s %s' % (VALIDATION_STATUS_SUCCESS, model_name), 1)
def convert_png_binary_to_data_url(content: Union[str, bytes]) -> str: """Converts a PNG image string (represented by 'content') to a data URL. Args: content: str. PNG binary file content. Returns: str. Data URL created from the binary content of the PNG. Raises: Exception. The given binary string does not represent a PNG image. """ # We accept unicode but imghdr.what(file, h) accepts 'h' of type bytes. # So we have casted content to be bytes. content = python_utils.convert_to_bytes(content) if imghdr.what(None, h=content) == 'png': return '%s%s' % ( PNG_DATA_URL_PREFIX, python_utils.url_quote(base64.b64encode(content)) # type: ignore[no-untyped-call] ) else: raise Exception('The given string does not represent a PNG image.')
def run(self): try: self.output = self.func() if self.verbose: log('LOG %s:' % self.name, show_time=True) log(self.output) log('----------------------------------------') log('FINISHED %s: %.1f secs' % (self.name, time.time() - self.start_time), show_time=True) except Exception as e: self.exception = e self.stacktrace = traceback.format_exc() if 'KeyboardInterrupt' not in python_utils.convert_to_bytes( self.exception.args[0]): log(e) log('ERROR %s: %.1f secs' % (self.name, time.time() - self.start_time), show_time=True) finally: self.semaphore.release() self.finished = True
def verify_signature(message, vm_id, received_signature): """Function that checks if the signature received from the VM is valid. Args: message: dict. The message payload data. vm_id: str. The ID of the VM instance. received_signature: str. The signature received from the VM. Returns: bool. Whether the incoming request is valid. """ secret = None for val in config_domain.VMID_SHARED_SECRET_KEY_MAPPING.value: if val['vm_id'] == vm_id: secret = python_utils.convert_to_bytes(val['shared_secret_key']) break if secret is None: return False generated_signature = generate_signature(secret, message) if generated_signature != received_signature: return False return True
def get_user_id_from_email(email): """Given an email address, returns a user id. Returns None if the email address does not correspond to a valid user id. """ class _FakeUser(ndb.Model): """A fake user class.""" _use_memcache = False _use_cache = False user = ndb.UserProperty(required=True) try: fake_user = users.User(email) except users.UserNotFoundError: logging.error( 'The email address %s does not correspond to a valid user_id' % email) return None key = _FakeUser(id=email, user=fake_user).put() obj = _FakeUser.get_by_id(key.id()) user_id = obj.user.user_id() return python_utils.convert_to_bytes(user_id) if user_id else None
def run(self): try: self.task_results = self.func() if self.verbose: for task_result in self.task_results: # The following section will print the output of the lint # checks. if self.report_enabled: log( 'Report from %s check\n' '----------------------------------------\n' '%s' % (task_result.name, '\n'.join( task_result.get_report())), show_time=True) # The following section will print the output of backend # tests. else: log( 'LOG %s:\n%s' '----------------------------------------' % (self.name, task_result.messages[0]), show_time=True) log( 'FINISHED %s: %.1f secs' % ( self.name, time.time() - self.start_time), show_time=True) except Exception as e: self.exception = e self.stacktrace = traceback.format_exc() if 'KeyboardInterrupt' not in python_utils.convert_to_bytes( self.exception.args[0]): log(e) log( 'ERROR %s: %.1f secs' % (self.name, time.time() - self.start_time), show_time=True) finally: self.semaphore.release() self.finished = True
def get_exploration_by_id(exploration_id, strict=True, version=None): """Returns an Exploration domain object. Args: exploration_id: str. The id of the exploration to be returned. strict: bool. Whether to fail noisily if no exploration with a given id exists. version: int or None. The version of the exploration to be returned. If None, the latest version of the exploration is returned. Returns: Exploration. The domain object corresponding to the given exploration. """ sub_namespace = python_utils.convert_to_bytes(version) if version else None cached_exploration = caching_services.get_multi( caching_services.CACHE_NAMESPACE_EXPLORATION, sub_namespace, [exploration_id] ).get(exploration_id) if cached_exploration is not None: return cached_exploration else: exploration_model = exp_models.ExplorationModel.get( exploration_id, strict=strict, version=version) if exploration_model: exploration = get_exploration_from_model(exploration_model) caching_services.set_multi( caching_services.CACHE_NAMESPACE_EXPLORATION, sub_namespace, { exploration_id: exploration }) return exploration else: return None
def main(args=None): """Run the tests.""" parsed_args = _PARSER.parse_args(args=args) setup.main(args=[]) setup_gae.main(args=[]) for directory in DIRS_TO_ADD_TO_SYS_PATH: if not os.path.exists(os.path.dirname(directory)): raise Exception('Directory %s does not exist.' % directory) # The directories should only be inserted starting at index 1. See # https://stackoverflow.com/a/10095099 and # https://stackoverflow.com/q/10095037 for more details. sys.path.insert(1, directory) import dev_appserver dev_appserver.fix_sys_path() if parsed_args.generate_coverage_report: python_utils.PRINT('Checking whether coverage is installed in %s' % common.OPPIA_TOOLS_DIR) if not os.path.exists( os.path.join(common.OPPIA_TOOLS_DIR, 'coverage-%s' % common.COVERAGE_VERSION)): raise Exception('Coverage is not installed, please run the start ' 'script.') pythonpath_components = [COVERAGE_DIR] if os.environ.get('PYTHONPATH'): pythonpath_components.append(os.environ.get('PYTHONPATH')) os.environ['PYTHONPATH'] = os.pathsep.join(pythonpath_components) if parsed_args.test_target and parsed_args.test_path: raise Exception('At most one of test_path and test_target ' 'should be specified.') if parsed_args.test_path and '.' in parsed_args.test_path: raise Exception('The delimiter in test_path should be a slash (/)') if parsed_args.test_target and '/' in parsed_args.test_target: raise Exception('The delimiter in test_target should be a dot (.)') if parsed_args.test_target: if '_test' in parsed_args.test_target: all_test_targets = [parsed_args.test_target] else: python_utils.PRINT('') python_utils.PRINT( '---------------------------------------------------------') python_utils.PRINT( 'WARNING : test_target flag should point to the test file.') python_utils.PRINT( '---------------------------------------------------------') python_utils.PRINT('') time.sleep(3) python_utils.PRINT('Redirecting to its corresponding test file...') all_test_targets = [parsed_args.test_target + '_test'] else: include_load_tests = not parsed_args.exclude_load_tests all_test_targets = _get_all_test_targets( test_path=parsed_args.test_path, include_load_tests=include_load_tests) # Prepare tasks. max_concurrent_runs = 25 concurrent_count = min(multiprocessing.cpu_count(), max_concurrent_runs) semaphore = threading.Semaphore(concurrent_count) task_to_taskspec = {} tasks = [] for test_target in all_test_targets: test = TestingTaskSpec(test_target, parsed_args.generate_coverage_report) task = concurrent_task_utils.create_task(test.run, parsed_args.verbose, semaphore, name=test_target) task_to_taskspec[task] = test tasks.append(task) task_execution_failed = False try: concurrent_task_utils.execute_tasks(tasks, semaphore) except Exception: task_execution_failed = True for task in tasks: if task.exception: concurrent_task_utils.log( python_utils.convert_to_bytes(task.exception.args[0])) python_utils.PRINT('') python_utils.PRINT('+------------------+') python_utils.PRINT('| SUMMARY OF TESTS |') python_utils.PRINT('+------------------+') python_utils.PRINT('') # Check we ran all tests as expected. total_count = 0 total_errors = 0 total_failures = 0 for task in tasks: spec = task_to_taskspec[task] if not task.finished: python_utils.PRINT('CANCELED %s' % spec.test_target) test_count = 0 elif (task.exception and 'No tests were run' in python_utils.convert_to_bytes(task.exception.args[0])): python_utils.PRINT('ERROR %s: No tests found.' % spec.test_target) test_count = 0 elif task.exception: exc_str = python_utils.convert_to_bytes(task.exception.args[0]) python_utils.PRINT(exc_str[exc_str.find('='):exc_str.rfind('-')]) tests_failed_regex_match = re.search( r'Test suite failed: ([0-9]+) tests run, ([0-9]+) errors, ' '([0-9]+) failures', python_utils.convert_to_bytes(task.exception.args[0])) try: test_count = int(tests_failed_regex_match.group(1)) errors = int(tests_failed_regex_match.group(2)) failures = int(tests_failed_regex_match.group(3)) total_errors += errors total_failures += failures python_utils.PRINT('FAILED %s: %s errors, %s failures' % (spec.test_target, errors, failures)) except AttributeError: # There was an internal error, and the tests did not run (The # error message did not match `tests_failed_regex_match`). test_count = 0 total_errors += 1 python_utils.PRINT('') python_utils.PRINT( '------------------------------------------------------') python_utils.PRINT(' WARNING: FAILED TO RUN %s' % spec.test_target) python_utils.PRINT('') python_utils.PRINT( ' This is most likely due to an import error.') python_utils.PRINT( '------------------------------------------------------') else: try: tests_run_regex_match = re.search( r'Ran ([0-9]+) tests? in ([0-9\.]+)s', task.output) test_count = int(tests_run_regex_match.group(1)) test_time = float(tests_run_regex_match.group(2)) python_utils.PRINT('SUCCESS %s: %d tests (%.1f secs)' % (spec.test_target, test_count, test_time)) except Exception: python_utils.PRINT('An unexpected error occurred. ' 'Task output:\n%s' % task.output) total_count += test_count python_utils.PRINT('') if total_count == 0: raise Exception('WARNING: No tests were run.') python_utils.PRINT('Ran %s test%s in %s test class%s.' % (total_count, '' if total_count == 1 else 's', len(tasks), '' if len(tasks) == 1 else 'es')) if total_errors or total_failures: python_utils.PRINT('(%s ERRORS, %s FAILURES)' % (total_errors, total_failures)) else: python_utils.PRINT('All tests passed.') if task_execution_failed: raise Exception('Task execution failed.') elif total_errors or total_failures: raise Exception('%s errors, %s failures' % (total_errors, total_failures)) if parsed_args.generate_coverage_report: subprocess.check_call( [sys.executable, COVERAGE_MODULE_PATH, 'combine']) process = subprocess.Popen([ sys.executable, COVERAGE_MODULE_PATH, 'report', '--omit="%s*","third_party/*","/usr/share/*"' % common.OPPIA_TOOLS_DIR, '--show-missing' ], stdout=subprocess.PIPE) report_stdout, _ = process.communicate() python_utils.PRINT(report_stdout) coverage_result = re.search( r'TOTAL\s+(\d+)\s+(\d+)\s+(?P<total>\d+)%\s+', report_stdout) if coverage_result.group('total') != '100': raise Exception('Backend test coverage is not 100%') python_utils.PRINT('') python_utils.PRINT('Done!')
def setUp(self): super(TrainedClassifierHandlerTests, self).setUp() self.exp_id = 'exp_id1' self.title = 'Testing Classifier storing' self.category = 'Test' yaml_path = os.path.join(feconf.TESTS_DATA_DIR, 'string_classifier_test.yaml') with python_utils.open_file(yaml_path, 'r') as yaml_file: self.yaml_content = yaml_file.read() self.signup(self.CURRICULUM_ADMIN_EMAIL, self.CURRICULUM_ADMIN_USERNAME) self.signup('*****@*****.**', 'mod') assets_list = [] with self.swap(feconf, 'ENABLE_ML_CLASSIFIERS', True): exp_services.save_new_exploration_from_yaml_and_assets( feconf.SYSTEM_COMMITTER_ID, self.yaml_content, self.exp_id, assets_list) self.exploration = exp_fetchers.get_exploration_by_id(self.exp_id) self.algorithm_id = feconf.INTERACTION_CLASSIFIER_MAPPING[ self.exploration.states['Home'].interaction.id]['algorithm_id'] self.algorithm_version = feconf.INTERACTION_CLASSIFIER_MAPPING[ self.exploration.states['Home'].interaction. id]['algorithm_version'] self.classifier_data = { '_alpha': 0.1, '_beta': 0.001, '_prediction_threshold': 0.5, '_training_iterations': 25, '_prediction_iterations': 5, '_num_labels': 10, '_num_docs': 12, '_num_words': 20, '_label_to_id': { 'text': 1 }, '_word_to_id': { 'hello': 2 }, '_w_dp': [], '_b_dl': [], '_l_dp': [], '_c_dl': [], '_c_lw': [], '_c_l': [], } classifier_training_job = ( classifier_services.get_classifier_training_job( self.exp_id, self.exploration.version, 'Home', self.algorithm_id)) self.assertIsNotNone(classifier_training_job) self.job_id = classifier_training_job.job_id # TODO(pranavsid98): Replace the three commands below with # mark_training_job_pending after Giritheja's PR gets merged. classifier_training_job_model = ( classifier_models.ClassifierTrainingJobModel.get(self.job_id, strict=False)) classifier_training_job_model.status = ( feconf.TRAINING_JOB_STATUS_PENDING) classifier_training_job_model.update_timestamps() classifier_training_job_model.put() self.job_result = (training_job_response_payload_pb2. TrainingJobResponsePayload.JobResult()) self.job_result.job_id = self.job_id classifier_frozen_model = ( text_classifier_pb2.TextClassifierFrozenModel()) classifier_frozen_model.model_json = json.dumps(self.classifier_data) self.job_result.text_classifier.CopyFrom(classifier_frozen_model) self.payload_proto = ( training_job_response_payload_pb2.TrainingJobResponsePayload()) self.payload_proto.job_result.CopyFrom(self.job_result) self.payload_proto.vm_id = feconf.DEFAULT_VM_ID self.secret = feconf.DEFAULT_VM_SHARED_SECRET self.payload_proto.signature = classifier_services.generate_signature( python_utils.convert_to_bytes(self.secret), python_utils.convert_to_bytes( self.payload_proto.job_result.SerializeToString()), self.payload_proto.vm_id) self.payload_for_fetching_next_job_request = { 'vm_id': feconf.DEFAULT_VM_ID, 'message': json.dumps({}) } self.payload_for_fetching_next_job_request['signature'] = ( classifier_services.generate_signature( python_utils.convert_to_bytes(self.secret), python_utils.convert_to_bytes( self.payload_for_fetching_next_job_request['message']), self.payload_for_fetching_next_job_request['vm_id']))
def convert_to_textangular(html_data): """This function converts the html to TextAngular supported format. Args: html_data: str. HTML string to be converted. Returns: str. The converted HTML string. """ if not len(html_data): return html_data # <br> is replaced with <br/> before conversion because BeautifulSoup # in some cases adds </br> closing tag and br is reported as parent # of other tags which produces issues in migration. html_data = html_data.replace('<br>', '<br/>') # To convert the rich text content within tabs and collapsible components # to valid TextAngular format. If there is no tabs or collapsible component # convert_tag_contents_to_rte_format will make no change to html_data. html_data = convert_tag_contents_to_rte_format( html_data, convert_to_textangular) soup = bs4.BeautifulSoup(html_data.encode(encoding='utf-8'), 'html.parser') allowed_tag_list = ( feconf.RTE_CONTENT_SPEC[ 'RTE_TYPE_TEXTANGULAR']['ALLOWED_TAG_LIST']) allowed_parent_list = ( feconf.RTE_CONTENT_SPEC[ 'RTE_TYPE_TEXTANGULAR']['ALLOWED_PARENT_LIST']) # The td tag will be unwrapped and tr tag will be replaced with p tag. # So if td is parent of blockquote after migration blockquote should # be parent of the p tag to get the alomst same appearance. p cannot # remain parent of blockquote since that is not allowed in TextAngular. # If blockquote is wrapped in p we need to unwrap the p but here # we need to make blockquote the parent of p. Since this cannot # be distinguished after migration to p, this part is checked # before migration. for blockquote in soup.findAll(name='blockquote'): if blockquote.parent.name == 'td': blockquote.parent.parent.wrap(soup.new_tag('blockquote')) blockquote.unwrap() # If p tags are left within a td tag, the contents of a table row # in final output will span to multiple lines instead of all # items being in a single line. So, any p tag within # td tag is unwrapped. for p in soup.findAll(name='p'): if p.parent.name == 'td': p.unwrap() # To remove all tags except those in allowed tag list. all_tags = soup.findAll() for tag in all_tags: if tag.name == 'strong': tag.name = 'b' elif tag.name == 'em': tag.name = 'i' # Current rte does not support horizontal rule, the closest # replacement of a horizontal rule is a line break to obtain # the same appearance. elif tag.name == 'hr': tag.name = 'br' # 'a' tag is to be replaced with oppia-noninteractive-link. # For this the attributes and text within a tag is used to # create new link tag which is wrapped as parent of a and then # a tag is removed. # In case where there is no href attribute or no text within the # a tag, the tag is simply removed. elif tag.name == 'a': replace_with_link = True if tag.has_attr('href') and tag.get_text(): children = tag.findChildren() for child in children: if child.name == 'oppia-noninteractive-link': tag.unwrap() replace_with_link = False if replace_with_link: link = soup.new_tag('oppia-noninteractive-link') url = tag['href'] text = tag.get_text() link['url-with-value'] = escape_html(json.dumps(url)) link['text-with-value'] = escape_html(json.dumps(text)) tag.wrap(link) # If any part of text in a tag is wrapped in b or i tag # link tag is also wrapped in those tags to maintain # almost similar appearance. count_of_b_parent = 0 count_of_i_parent = 0 for child in children: if child.name == 'b' and not count_of_b_parent: link.wrap(soup.new_tag('b')) count_of_b_parent = 1 if child.name == 'i' and not count_of_i_parent: link.wrap(soup.new_tag('i')) count_of_i_parent = 1 tag.extract() else: tag.unwrap() # To maintain the appearance of table, tab is added after # each element in row. In one of the cases the elements were # p tags with some text and line breaks. In such case td.string # is None and there is no need to add tabs since linebreak is # already present. elif tag.name == 'td' and tag.next_sibling: tag.insert_after(' ') tag.unwrap() # Divs and table rows are both replaced with p tag # to maintain almost same appearance. elif tag.name == 'div' or tag.name == 'tr': tag.name = 'p' # All other invalid tags are simply removed. elif tag.name not in allowed_tag_list: tag.unwrap() # Removal of tags can break the soup into parts which are continuous # and not wrapped in any tag. This part recombines the continuous # parts not wrapped in any tag. soup = bs4.BeautifulSoup( python_utils.convert_to_bytes(soup), 'html.parser') # Ensure that blockquote tag is wrapped in an allowed parent. for blockquote in soup.findAll(name='blockquote'): while blockquote.parent.name not in allowed_parent_list['blockquote']: blockquote.parent.unwrap() # Ensure that pre tag is not wrapped p tags. for pre in soup.findAll(name='pre'): while pre.parent.name == 'p': pre.parent.unwrap() # Ensure that ol and ul are not wrapped in p tags. for tag_name in ['ol', 'ul']: for tag in soup.findAll(name=tag_name): while tag.parent.name == 'p': tag.parent.unwrap() # Ensure that br tag is wrapped in an allowed parent. for br in soup.findAll(name='br'): if br.parent.name == 'pre': br.insert_after('\n') br.unwrap() elif br.parent.name not in allowed_parent_list['br']: wrap_with_siblings(br, soup.new_tag('p')) # Ensure that b and i tags are wrapped in an allowed parent. for tag_name in ['b', 'i']: for tag in soup.findAll(name=tag_name): if tag.parent.name == 'oppia-noninteractive-link': tag.parent.wrap(soup.new_tag(tag_name)) parent = tag.parent.parent tag.unwrap() tag = parent if tag.parent.name == tag_name: parent = tag.parent tag.unwrap() tag = parent if tag.parent.name in ['blockquote', '[document]']: wrap_with_siblings(tag, soup.new_tag('p')) # Ensure that oppia inline components are wrapped in an allowed parent. for tag_name in INLINE_COMPONENT_TAG_NAMES: for tag in soup.findAll(name=tag_name): if tag.parent.name in ['blockquote', '[document]']: wrap_with_siblings(tag, soup.new_tag('p')) # Ensure oppia link component is not a child of another link component. for link in soup.findAll(name='oppia-noninteractive-link'): if link.parent.name == 'oppia-noninteractive-link': link.unwrap() # Ensure that oppia block components are wrapped in an allowed parent. for tag_name in BLOCK_COMPONENT_TAG_NAMES: for tag in soup.findAll(name=tag_name): if tag.parent.name in ['blockquote', '[document]']: wrap_with_siblings(tag, soup.new_tag('p')) # Ensure that every content in html is wrapped in a tag. for content in soup.contents: if not content.name: content.wrap(soup.new_tag('p')) # Ensure that p tag has a valid parent. for p in soup.findAll(name='p'): if p.parent.name != 'p' and ( p.parent.name not in allowed_parent_list['p']): p.parent.unwrap() # Ensure that p tag is not wrapped in p tag. for p in soup.findAll(name='p'): if p.parent.name == 'p': child_tags = p.parent.contents index = 0 while index < len(child_tags): current_tag = child_tags[index] # If the current tag is not a paragraph tag, wrap it and all # consecutive non-p tags after it into a single p-tag. new_p = soup.new_tag('p') while current_tag.name != 'p': current_tag = current_tag.wrap(new_p) index = child_tags.index(current_tag) + 1 if index >= len(child_tags): break current_tag = child_tags[index] index += 1 p.parent.unwrap() # Beautiful soup automatically changes some <br> to <br/>, # so it has to be replaced directly in the string. # Also, when any html string with <br/> is stored in exploration # html strings they are stored as <br>. Since both of these # should match and <br> and <br/> have same working, # so the tag has to be replaced in this way. return python_utils.STR(soup).replace('<br/>', '<br>')