def test_missing_several_keys(self) -> None: """Missing several translations on ensure_translate.""" with self.assertRaises(KeyError) as error: with translation.Translator() as translator: translator.ensure_translate('country', 'en_UK') translator.ensure_translate('sacrebleu', 'en_UK') translator.ensure_translate('fully missing key', 'en_UK') self.assertIn('sacrebleu', str(error.exception)) self.assertIn('fully missing key', str(error.exception))
def test_translate_fieldss(self) -> None: """Translate fields of a mapping.""" with translation.Translator() as translator: translated = translator.ensure_translate_fields( { 'a': 'country', 'b': 'untranslated', 'c': 'my language' }, locale='en_UK', fields=('a', 'c', 'unknown')) self.assertEqual({ 'a': 'United Kingdom', 'c': 'English' }, translated)
def make_dicts( *, soc_definitions_xls: str, hires_csv: str, job_seekers_csv: str, states_txt: str, application_mode_csv: str, soc_structure_xls: str, soc_fap_crosswalk_airtable: str, brookings_automation_risk_json: str, occupation_requirements_json: str, skills_for_future_airtable: Optional[str] = None, ) -> list[dict[str, Any]]: """Prepare job info for MongoDB.""" job_groups = usa_cleaned_data.us_soc2010_job_groups( filename=soc_definitions_xls) job_groups['_id'] = job_groups['romeId'] # Domains structure = pd.read_excel(soc_structure_xls, skiprows=11)\ .dropna(subset=['Major Group'])\ .drop(['Minor Group', 'Broad Group', 'Detailed Occupation'], axis='columns') structure['Major Group'] = structure['Major Group'].str[:3] major_groups = structure.set_index('Major Group').squeeze() job_groups['domain'] = job_groups['romeId'].str[:3].map(major_groups) job_groups.domain.fillna('', inplace=True) local_stats = usa_cleaned_data.usa_compute_market_score( hires_csv=hires_csv, job_seekers_csv=job_seekers_csv, job_groups=job_groups) # Application modes. modes = cleaned_data.fap_application_modes(filename=application_mode_csv) fap_prefixes = pd.DataFrame({ 'fap_code': modes.index, 'fap_prefix': modes.index.str[:3] }) soc_to_fap_prefix = _load_crosswalk_airtable( *soc_fap_crosswalk_airtable.split(':')) soc_to_fap = soc_to_fap_prefix.join(fap_prefixes.set_index('fap_prefix'), on='fap_prefix', how='inner') soc_to_fap['modes'] = soc_to_fap.fap_code.map(modes) job_groups['applicationModes'] = soc_to_fap.groupby('soc_2018').apply( lambda faps: { str(fap.fap_code): fap.modes for fap in faps.itertuples() if fap.modes }) # Fill NaN with empty {}. job_groups['applicationModes'] = job_groups.applicationModes.apply( lambda s: s if isinstance(s, dict) else {}) # Add best counties. # TODO(cyrille): Rename field to a more generic area name. job_groups[ 'departementScores'] = market_score_derivatives.get_less_stressful_districts( local_stats, max_districts=20) # Fill NaN with empty []. job_groups['departementScores'] = job_groups.departementScores.apply( lambda s: s if isinstance(s, list) else []) job_groups['automationRisk'] = usa_cleaned_data.us_automation_brookings( filename=brookings_automation_risk_json, soc_filename=soc_definitions_xls).mul(100).round(0).astype(int) # Mark 0 values as 1, as 0 means undefined. job_groups.loc[job_groups['automationRisk'] == 0, 'automationRisk'] = 1 job_groups['automationRisk'].fillna(0, inplace=True) # Data per state. states = pd.read_csv(states_txt, delimiter='|') local_stats['state'] = local_stats['district_id'].astype(int).div(1000).astype(int)\ .map(states.set_index('STATE').STUSAB) state_scores = local_stats.dropna(subset=['market_score'])\ .groupby(['state', 'job_group']).market_score.median().reset_index() job_groups['admin1AreaScores'] = state_scores.groupby('job_group').apply( lambda stat_scores: [{ 'areaId': row.state, 'localStats': { 'imt': { 'yearlyAvgOffersPer10Candidates': round(row.market_score) } }, } for row in stat_scores.itertuples()]) # Fill NaN with empty []. job_groups['admin1AreaScores'] = job_groups.admin1AreaScores.apply( lambda s: s if isinstance(s, list) else []) job_groups['inDomain'] = 'in your industry' # Add occupation requirements from json file. with open(occupation_requirements_json, encoding='utf-8') as job_requirements_file: job_requirements_list = json.load(job_requirements_file) job_requirements_dict = { job_requirement.pop('_id'): job_requirement for job_requirement in job_requirements_list } job_groups['requirements'] = job_groups.index.map(job_requirements_dict) # Replace NaN by empty dicts. job_groups['requirements'] = job_groups.requirements.apply( lambda r: r if isinstance(r, dict) else {}) # SkillsForFuture skills_for_future_by_rome = airtable_to_protos.load_items_from_prefix( 'Skill', job_groups.index, skills_for_future_airtable, 'soc_prefixes_us') if skills_for_future_by_rome: with translation.Translator() as translator: translated_skills_for_future_by_rome = { rome_id: [ skill | translator.ensure_translate_fields( skill, locale='en', fields=_SKILL_18N_FIELDS) for skill in skills ] for rome_id, skills in skills_for_future_by_rome.items() } job_groups['skillsForFuture'] = job_groups.index.map( translated_skills_for_future_by_rome) return typing.cast(list[dict[str, Any]], job_groups.to_dict('records'))
def make_dicts( *, postings_csv: str, occupations_csv: str, jobs_xls: str, soc2010_js: str, career_jumps_csv: str, automation_xls: str, info_by_prefix_airtable: str, occupation_requirements_json: str, skills_for_future_airtable: Optional[str] = None, ) -> list[dict[str, Any]]: """Prepare job info for MongoDB.""" job_groups = uk_cleaned_data.uk_soc2010_job_groups(filename=jobs_xls) \ .reset_index().rename(columns={'Unit_Group': 'romeId'}) job_groups['_id'] = job_groups['romeId'] job_groups.set_index('_id', inplace=True) descriptions = uk_cleaned_data.uk_soc2010_group_descriptions( filename=soc2010_js) job_groups['description'] = descriptions['description'] job_groups['samples'] = descriptions['jobs'].apply( lambda job_names: [{ 'name': name } for name in job_names]) domains = uk_cleaned_data.uk_soc2010_job_groups(filename=jobs_xls, level='Major_Group')\ .squeeze().str.capitalize() job_groups['domain'] = job_groups.romeId.str[:1].map(domains) local_stats = local_diagnosis.compute_market_score( postings_csv=postings_csv, occupations_csv=occupations_csv) job_groups['automationRisk'] = _get_automation_risk(automation_xls).mul( 100).round(0) job_groups.loc[job_groups.automationRisk == 0, 'automationRisk'] = 1 job_groups.automationRisk.fillna(0, inplace=True) job_groups = job_groups.join( local_diagnosis.load_prefixed_info_from_airtable( job_groups.index, info_by_prefix_airtable)) # Add related job groups. career_jumps = pd.read_csv(career_jumps_csv, dtype='str') safe_career_jumps = career_jumps[career_jumps.target_job_group.map( job_groups.covidRisk) != 'COVID_RISKY'] job_groups['relatedJobGroups'] = safe_career_jumps\ .join(job_groups, on='job_group')\ .join(job_groups, on='target_job_group', lsuffix='_source')\ .groupby(['romeId_source'])\ .apply(lambda df: [ { 'jobGroup': target_job, 'mobilityType': 'CLOSE', } for target_job in df[['romeId', 'name', 'automationRisk']].to_dict('records')]) # Fill NaN with empty []. job_groups['relatedJobGroups'] = job_groups.relatedJobGroups.apply( lambda s: s if isinstance(s, list) else []) # Add best counties. # TODO(cyrille): Rename field to a more generic area name. job_groups[ 'departementScores'] = market_score_derivatives.get_less_stressful_districts( local_stats, max_districts=20) # Fill NaN with empty []. job_groups['departementScores'] = job_groups.departementScores.apply( lambda s: s if isinstance(s, list) else []) # Add occupation requirements from json file. with open(occupation_requirements_json, encoding='utf-8') as job_requirements_file: job_requirements_list = json.load(job_requirements_file) job_requirements_dict = { job_requirement.pop('_id'): job_requirement for job_requirement in job_requirements_list } job_groups['requirements'] = job_groups.index.map(job_requirements_dict) # Replace NaN by empty dicts. job_groups['requirements'] = job_groups.requirements.apply( lambda r: r if isinstance(r, dict) else {}) # SkillsForFuture skills_for_future_by_rome = airtable_to_protos.load_items_from_prefix( 'Skill', job_groups.index, skills_for_future_airtable, 'soc_prefixes_uk') if skills_for_future_by_rome: with translation.Translator() as translator: translated_skills_for_future_by_rome = { rome_id: [ skill | translator.ensure_translate_fields( skill, locale='en_UK', fields=_SKILL_18N_FIELDS) for skill in skills ] for rome_id, skills in skills_for_future_by_rome.items() } job_groups['skillsForFuture'] = job_groups.index.map( translated_skills_for_future_by_rome) return typing.cast(list[dict[str, Any]], job_groups.reset_index().to_dict('records'))
def test_fallback(self) -> None: """Fallback locale on ensure_translate.""" with translation.Translator() as translator: self.assertEqual( 'English', translator.ensure_translate('my language', 'en_UK'))
def test_ensure_translate(self) -> None: """Basic usage of ensure_translate.""" with translation.Translator() as translator: self.assertEqual('United Kingdom', translator.ensure_translate('country', 'en_UK'))
def test_missing_key(self) -> None: """Missing translation on ensure_translate.""" with self.assertRaises(KeyError): with translation.Translator() as translator: translator.ensure_translate('sacrebleu', 'en_UK')