def get(cls, language): try: if PYCOUNTRY: # lookup workaround for alpha_2 language codes lang = languages.get(alpha_2=language) if re.match( r"^[a-z]{2}$", language) else languages.lookup(language) return Language(lang.alpha_2, lang.alpha_3, lang.name, getattr(lang, "bibliographic", None)) else: lang = None if len(language) == 2: lang = languages.get(alpha2=language) elif len(language) == 3: for code_type in ['part2b', 'part2t', 'part3']: try: lang = languages.get(**{code_type: language}) break except KeyError: pass if not lang: raise KeyError(language) else: raise KeyError(language) return Language(lang.alpha2, lang.part3, lang.name, lang.part2b or lang.part2t) except (LookupError, KeyError): raise LookupError("Invalid language code: {0}".format(language))
def _has_valid_datatypes(self): """Assumes a valid bag/bag info; returns true if all datatypes in bag pass""" dates = [] for k, v in self.bag_info_data.items(): if k in self.bag_dates_to_validate: dates.append(v) langz = self.bag_info_data.get("Language", None) if dates: for date in dates: try: iso8601.parse_date(date) except Exception as e: print("Invalid date value: {}".format(date)) self.bag_exception = "Invalid date value: {}".format(date) return False if langz: if not isinstance(langz, list): langz = [langz] for language in langz: try: languages.get(part2b=language) except KeyError as e: print("Invalid language value: {}".format(language)) self.bag_exception = "Invalid language value: {}".format(language) return False return True
async def tr(e): s = e.pattern_match.group(1) if e.is_reply: s = await e.get_reply_message() s = s.message if e.pattern_match.group(1): to = e.pattern_match.group(1) else: to = 'en' text = trans.translate(s, dest=to) frm = languages.get(part1=text.src).name await e.reply('From: '+frm+'\n'+text.text) return to = re.findall(r"to=\w+", s) try: to = to[0] to = to.replace('to=', '') s = s.replace('to='+to+' ', '') print(s) print('to='+to) except IndexError: to = 'en' try: text = trans.translate(s, dest=to) except: await e.edit("Maybe wrong code name") return frm = languages.get(part1=text.src).name await e.reply('From: '+frm+'\n'+text.text)
def l2l3(l): """convert the language name >>> l2l3('en') ('eng', 'English') >>> l2l3('as') ('apc', 'Arabic, Syrian') >>> l2l3('ar') ('arb', 'Arabic') """ try: if l == 'pb': language = 'Por., Brazil' l3 = 'por' elif l == 'ms': language = 'Malay' l3 = 'zsm' elif l == 'as': language = 'Arabic, Syrian' l3 = 'apc' elif l == 'ar': language = 'Arabic' l3 = 'arb' elif l == 'zh': language = 'Chinese, Mandarin' l3 = 'cmn' else: language = languages.get(alpha2=l).name l3 = languages.get(alpha2=l).part3 except: language = l l3 = 'unk' return l3, language
def convert_language_code(lang, to_format, throw_exceptions=True): ''' Convert ISO 639 language code to <to_format>. Throws KeyError if none found. :param throw_exceptions: Set to False to never throw KeyError. :param lang: original language code :param to_format: 'alpha2' or 'alpha3' ''' mappings = {'alpha2': 'part1', 'alpha3': 'part2b'} if to_format in mappings: to_format = mappings[to_format] if throw_exceptions: catch = [KeyError, None] else: catch = [Exception, Exception] try: return getattr(languages.get(part2b=lang), to_format) except catch[0]: try: return getattr(languages.get(part3=lang), to_format) except catch[0]: try: return getattr(languages.get(part1=lang), to_format) except catch[1]: return ''
def test_extract_subs(self): tmp_dir = tempfile.mkdtemp() file = os.path.join(tmp_dir, 'fragment.mkv') shutil.copyfile('fragment.mkv', file) with Storage(':memory:') as storage: app_run_config = AppRunConfig(tmp_dir, [languages.get(part1=x) for x in ['ru', 'en', 'fr']], [(languages.get(part1='ru'), languages.get(part1='en'))], ".*", {}, False, False) extract_subs = ExtractSubs(app_run_config, storage) extract_subs.scan_files() video_files = storage.get_all_video_files() self.assertEqual(1, len(video_files)) fragment_file = video_files[0] subtitles = storage.get_all_subtitles_by_video_file_id(fragment_file['id']) self.assertEqual(5, len(subtitles)) subtitles_file_names = set(os.path.basename(subtitle['full_path']) for subtitle in subtitles) self.assertEqual({'fragment_eng_SDH.srt', 'fragment_fra.srt', 'fragment.rus_eng.ass', 'fragment_rus_Forced.srt', 'fragment_rus.srt'}, subtitles_file_names) # 1 - cause forced subtitles is empty, file is empty, can't merge self.assertEqual(1, len(storage.get_all_merged_subtitles_by_video_file_id(fragment_file['id']))) shutil.rmtree(tmp_dir, ignore_errors=True)
def setup_wizard(_: dict): """Return wizard steps for setting metadata variables.""" language_list = [{ "value": lang, "name": languages.get(part3=lang).name if lang in languages.part3 else lang } for lang in registry.languages] language_list.sort(key=lambda x: x["name"]) language_default = { "value": "swe", "name": languages.get(part3="swe").name } questions = [{ "type": "text", "name": "metadata.id", "message": "Machine name of corpus (a-z, 0-9, -):", "validate": lambda x: bool(re.match(r"^[a-z0-9-]+$", x)) }, { "type": "text", "name": "metadata.name.eng", "message": "Human readable name of corpus:" }, { "type": "select", "name": "metadata.language", "message": "What language are your source files?", "choices": language_list, "default": language_default }] return questions
def get(cls, language): try: if PYCOUNTRY: c = languages.lookup(language) return Language(c.alpha_2, c.alpha_3, c.name, getattr(c, "bibliographic", None)) else: l = None if len(language) == 2: l = languages.get(alpha2=language) elif len(language) == 3: for code_type in ['part2b', 'part2t', 'part3']: try: l = languages.get(**{code_type: language}) break except KeyError: pass if not l: raise KeyError(language) else: raise KeyError(language) return Language(l.alpha2, l.part3, l.name, l.part2b or l.part2t) except (LookupError, KeyError): raise LookupError("Invalid language code: {0}".format(language))
def get_comic_info(tmp_file_path, original_file_name, original_file_extension): archive = ComicArchive(tmp_file_path) if archive.seemsToBeAComicArchive(): if archive.hasMetadata(MetaDataStyle.CIX): style = MetaDataStyle.CIX elif archive.hasMetadata(MetaDataStyle.CBI): style = MetaDataStyle.CBI else: style = None if style is not None: loadedMetadata = archive.readMetadata(style) lang = loadedMetadata.language if len(lang) == 2: loadedMetadata.language = isoLanguages.get(part1=lang).name elif len(lang) == 3: loadedMetadata.language = isoLanguages.get(part3=lang).name else: loadedMetadata.language = "" return uploader.BookMeta( file_path=tmp_file_path, extension=original_file_extension, title=loadedMetadata.title or original_file_name, author=" & ".join([ credit["person"] for credit in loadedMetadata.credits if credit["role"] == "Writer" ]) or u"Unknown", cover=extractCover(tmp_file_path, original_file_extension), description=loadedMetadata.comments or "", tags="", series=loadedMetadata.series or "", series_id=loadedMetadata.issue or "", languages=loadedMetadata.language)
def get_iso_lang_data(lang: str) -> Tuple[Dict, Union[Dict, None]]: """ISO-639-x languages details for lang. Raises NotFound Included keys: iso-639-1, iso-639-2b, iso-639-2t, iso-639-3, iso-639-5 english, iso_types See get_language_details() for details""" iso_types = [] for code_type in [f"part{lang_}" for lang_ in ISO_LEVELS] + ["name"]: try: iso639_languages.get(**{code_type: lang}) iso_types.append(code_type) except KeyError: pass if not iso_types: raise NotFound("Not a valid iso language name/code") language = iso639_languages.get(**{iso_types[0]: lang}) lang_data = { f"iso-639-{lang_}": getattr(language, f"part{lang_}") for lang_ in ISO_LEVELS } lang_data.update({"english": language.name, "iso_types": iso_types}) if language.macro: return ( lang_data, get_iso_lang_data(language.macro)[0], ) # first item in the returned tuple return lang_data, None
def gen_translation_str_from_multilang_field(fieldkey, message, key, data, errors, context): ''' Fetch all the lang* fields e.g. for fieldkey 'title' of type ('langtitle', n, 'lang'): u'en', ('langtitle', n, 'value'): u'translation' and generate a JSON translation string of type title: {'en':'translation', 'fi':'kaannos'} This converter is called only once for the hidden field where the data is then stored. :param fieldkey: 'title' or 'notes' currently :param message: translation string for parse error message :param key: key :param data: data :param errors: validation errors :param context: context ''' langkey = 'lang' + fieldkey # For API requests, we need to validate if the # data is already given in the new format, and # no lang* fields given. In that case, do nothing. if data.get((fieldkey,)) and not data.get((langkey, 0, 'lang')): json_string = data.get((fieldkey,)) json_data = {} try: json_data = json.loads(json_string) except (ValueError, TypeError): errors[key].append(message) # we also need to validate the keys: try: for k in json_data.keys(): if k == "undefined": # some harvesters don't have languages defined continue try: languages.get(part3=k) except KeyError: errors[key].append(_('The language code is not in ISO639-3 format')) except AttributeError: errors[key].append(_("The given {field} string is incorrectly formatted".format(field=fieldkey))) return json_data = {} # loop through all the translations i = 0 while data.get((langkey, i, 'lang'), []): lval = data[(langkey, i, 'lang')] rval = data[(langkey, i, 'value')] if rval: # skip a language without translation json_data[lval] = rval i += 1 if json_data: data[(fieldkey,)] = json.dumps(json_data)
def test_translate_text_with_original_lang(text, target_lang, original_lang): answer = translate_text(text, target_lang, original_lang) assert "Hello my friend" == answer assert TextBlob(text).detect_language() == languages.get( name=original_lang.capitalize()).alpha2 assert TextBlob(answer).detect_language() == languages.get( name=target_lang.capitalize()).alpha2
def iso_coding(arr): try: if len(arr) == 2: return languages.get(part1=arr).name elif len(arr) == 3: return languages.get(part3=arr).name except: return '\\N'
def __init__(self, name): self.name = name try: if len(name) == 2: self.lang = languages.get(part1=name) elif len(name) == 3: self.lang = languages.get(part2t=name) else: self.lang = languages.get(name=name) except Exception: self.lang = name
def get_subtitle_language(subtitle_filename): subtitle_filename = subtitle_filename.lower() assert subtitle_filename.endswith(".srt") filename_without_extension = os.path.splitext(subtitle_filename)[0] try: three_letter_iso = filename_without_extension[-3:] return languages.get(part2b=three_letter_iso).part2b except KeyError: try: two_letter_iso = filename_without_extension[-2:] return languages.get(part1=two_letter_iso).part2b except KeyError: return None
def verbose_language(origin: Optional[str]) -> str: language = "unknown origin" if origin is not None: language = EXTRA_LANGUAGES.get(origin, origin) try: if len(origin) == 2: language = languages.get(alpha2=origin).name elif len(origin) == 3: language = languages.get(part3=origin).name else: print("???", origin) except KeyError: language = origin return language
def test_alternative_name(self): self.assertEqual(languages.get(name='Romanian').part3, 'ron') self.assertEqual(languages.get(name='Moldavian').part3, 'ron') self.assertEqual(languages.get(name='Moldovan').part3, 'ron') self.assertEqual(languages.get(name='Dimili').part3, 'zza') self.assertEqual(languages.get(name='Dimli (macrolanguage)').part3, 'zza') self.assertEqual(languages.get(name='Kirdki').part3, 'zza') self.assertEqual(languages.get(name='Kirmanjki (macrolanguage)').part3, 'zza') self.assertEqual(languages.get(name='Zaza').part3, 'zza') self.assertEqual(languages.get(name='Zazaki').part3, 'zza')
def get_language(cls, language): try: if len(language) == 2: return languages.get(alpha2=language) elif len(language) == 3: for code_type in ['part2b', 'part2t', 'part3']: try: return languages.get(**{code_type: language}) except KeyError: pass raise KeyError else: raise ValueError("Invalid language code: {0}".format(language)) except KeyError: raise ValueError("Invalid language code: {0}".format(language))
def test_Lang(): #Lang, GlossaryFrom, GlossaryTo Lang = 'en' try: Lang = pytest.config[pytest.modulename]['Lang'] except: assert True lang = re.sub(r'\-.*', r'', Lang) pytest.lang = False found = False try: pytest.lang = languages.get(part1=lang) found = True except: pass if not found: try: pytest.lang = languages.get(part2b=lang) found = True except: pass if not found: try: pytest.lang = languages.get(part2t=lang) found = True except: pass if not found: try: pytest.lang = languages.get(part3=lang) found = True except: pass if not found: try: pytest.lang = languages.get(part5=lang) found = True except: pass if (len(lang) == 2 or len(lang) == 3): assert pytest.lang else: assert True #let this test pass as we cannot check everything
def parse(self, options): # 001 self.control_number = self.record.text('mx:controlfield[@tag="001"]') # 010 : If present, it takes precedence over 001. # <https://github.com/scriptotek/mc2skos/issues/42> value = self.record.text('mx:datafield[@tag="010"]/mx:subfield[@code="a"]') if value is not None: self.control_number = value # 016 : If present, it takes precedence over 001 # <https://github.com/scriptotek/mc2skos/issues/42> value = self.record.text('mx:datafield[@tag="016"]/mx:subfield[@code="a"]') if value is not None: self.control_number = value # 003 self.control_number_identifier = self.record.text('mx:controlfield[@tag="003"]') # 005 value = self.record.text('mx:controlfield[@tag="005"]') if value is not None: try: self.modified = datetime.strptime(value, '%Y%m%d%H%M%S.%f') except ValueError: logger.warning('Record %s: Ignoring invalid date in 005 field: %s', self.control_number, value) # 040: Record Source lang = self.record.text('mx:datafield[@tag="040"]/mx:subfield[@code="b"]') or 'eng' self.lang = languages.get(part2b=lang).part1
def getiso6392t(s: str) -> str: t = s.split('_')[0] t = s.split('-')[0] try: return languages.get(alpha2=t).part2t except: return s
def __init__(self, language_code, country_code=""): #validate input if not isinstance(language_code, str): raise TypeError("'language_code' must be a string") if not isinstance(country_code, str): raise TypeError("'country_code' must be a string") if len(language_code) != 3 and len(language_code) != 2: raise ValueError( "'language_code' must be a 3-character or 2-character string") if country_code != "" and len(country_code) != 2: raise ValueError("'country_code' must be a 2-character string") for char in language_code: if char not in ALPHABET: raise TypeError( "'language_code' must contains non-capital alphabet character" ) if country_code != "": for char in country_code: if char not in CAPITAL_ALPHABET: raise TypeError( "'country_code' must contain capital alphabet character" ) #check and convert ISO 639-1 to ISO 639-3 if len(language_code) == 3: lang = language_code else: lang = languages.get(alpha2=language_code).part3 self.__lang_code = lang self.__country_code = country_code
def test_acronym(self): # Expects acronym to be converted to 450 $a, having $g d voc = Vocabulary() voc.resources.load([ { 'id': '1', 'prefLabel': {'nb': { 'value': 'Forente nasjoner', 'hasAcronym': 'FN' }}, 'type': ['Topic'] } ]) voc.default_language = languages.get(alpha2='nb') m21 = Marc21(voc) tree = etree.parse(BytesIO(m21.serialize())) f150 = tree.xpath('//m:record/m:datafield[@tag="150"]' + '[./m:subfield[@code="a"]/text() = "Forente nasjoner"]', namespaces={'m': 'http://www.loc.gov/MARC21/slim'}) f450 = tree.xpath('//m:record/m:datafield[@tag="450"]' + '[./m:subfield[@code="a"]/text() = "FN"]' + '[./m:subfield[@code="g"]/text() = "d"]', namespaces={'m': 'http://www.loc.gov/MARC21/slim'}) self.assertEqual(1, len(f150)) self.assertEqual(1, len(f450))
def process_json_tweets(rank, file_name, processes, trend_type): # Open the json file containing all the tweets with open(file_name, 'r', encoding = "utf-8") as f: objs = ijson.items(f, 'rows.item') outDic = {} try: for i, line in enumerate(objs): if i%processes == rank: try: if trend_type == TREND_TYPE_HASHTAG: # Count frequency of hashtags tweet = line['doc']["text"] tweet = re.split('[!"$%&\'()*+,-./:;<=>?@[\\]^ `{|}~]',tweet) #tweet = tweet.split() outDic = findHash(tweet,outDic) elif trend_type == TREND_TYPE_LANGUAGE: # Count frequency of languages lang = line["doc"]["metadata"]["iso_language_code"] try: lang = languages.get(alpha2=lang).name + "(" + lang + ")" except KeyError: lang = "Undefined" + "(" + lang + ")" outDic = findLang(lang, outDic) except ValueError: print("Malformed JSON in tweet", i) except: print("Unexpected error:", sys.exc_info()[0]) raise except TypeError: print("Could not read line in json.") return outDic
def get_language_from_wiktionary_code(code): lang = None try: lang = languages.get(iso639_1_code=code) except KeyError: pass if not lang: try: lang = languages.get(iso639_3_code=code) except KeyError: pass if not lang: try: lang = languages.get(iso639_2T_code=code) except KeyError: pass if not lang: code = code.split('-')[0] try: lang = iso.get(part5=code) setattr(lang, 'iso639_3_code', lang.part3) except KeyError: # print("no lang for: ", code) pass return lang
def get_def(self, part3): from iso639 import languages try: loc = languages.get(part3=part3) except KeyError: loc = None return loc
def api_data(self): return { "id": self.pk, "title": self.title, "lang": self.lang, "language": languages.get(part3=self.lang).name, "completed": self.completed, "tokenCount": self.token_count(), "lemmatizationStatus": self.lemmatization_status(), "createdAt": self.created_at, "canRetry": self.can_retry(), "canCancel": self.can_cancel(), "deleteUrl": self.delete_url, "cloneUrl": self.clone_url, "clonedFrom": self.cloned_from.pk if self.cloned_from else None, "clonedFor": self.cloned_for.pk if self.cloned_for else None, "requireClone": self.classes.all().count() > 0, "handoutUrl": reverse("lemmatized_texts_handout", args=[self.secret_id]), }
def printStats(lngs, lexes, syns): totlem = 0 totsense = 0 synsets = set() print("""\\begin{tabular}{llrrrll} Language & Code & Synsets & Lemmas & Senses & Asia & Most Common\\\\ \\hline""" ) for lng in sorted(lngs): # Names for asia gid=6255147 asia = ', '.join([x[1] for x in lemmas['6255147'] if x[2] == lng]) or '---' # Names for most common mx = max(len(lexes[lng][g]) for g in lexes[lng]) maxes = [g for g in lexes[lng] if len(lexes[lng][g]) == mx] if len(maxes) > 3 or len(maxes) < 1: common == '---' else: common = ', '.join(maxes) numsenses = sum(len(lexes[lng][l]) for l in lexes[lng]) print("{} & {} & {:,d} & {:,d} & {:,d} & {} & {} \\\\ ".format( languages.get(alpha2=lng).name, lng, len(syns[lng]), len(lexes[lng]), numsenses, asia, common)) totlem += len(lexes[lng]) totsense += numsenses synsets = synsets.union(set(syns[lng])) print("{} & {} & {:,d} & {:,d} & {:,d} & {} & {} \\\\ ".format( 'Total', len(lngs), len(synsets), totlem, totsense, '---', '---')) print("""\\end{tabular}""")
async def gtr(self, ctx, language, *, text: str): language = language.capitalize() try: try: lang = languages.get(name=language) g = async_google_trans_new.google_translator() gemb = discord.Embed( title='Google Translation', color=self.bot.embed_color, timestamp=ctx.message.created_at).set_footer( text=f"Requested by {ctx.author}", icon_url=ctx.author.avatar_url) gemb.add_field(name='Input:', value=f'```\n{text}\n```') gemb.add_field( name=f'Output in {language}:', value=f'```\n{await g.translate(text, lang.alpha2)}\n```', inline=False) await ctx.send(embed=gemb) except KeyError: await qembed.send(ctx, 'Language not found.') except TypeError: await qembed.send( ctx, 'This is different from other translate commands. In this, you actually say the language. `en` becomes `english`.' )
def getLangage(self, languageCode): if languageCode in ["und", "in", "iw", "ckb"]: languageCode = "und" languageName = "und" else: languageName = languages.get(alpha2=languageCode).name return [languageCode, languageName]
def test_logic_part3(self): self.assertIs(map_language('ary'), self.moroccan) self.assertIs(languages.part3['ary'], self.moroccan) self.assertIs(languages.get(part3='ary'), self.moroccan) self.assertIs(map_language('Moroccan Arabic'), self.moroccan) self.assertIs(map_language('Tzeltal'), self.tzeltal) self.assertIs(map_language('Tzeltal, Tenejapa'), self.tzeltal) self.assertIs(map_language('tzh'), self.tzeltal)
def convert_language(lang): ''' Convert alpha2 language (eg. 'en') to terminology language (eg. 'eng') ''' if not lang: return "und" try: lang_object = languages.get(part1=lang) return lang_object.terminology except KeyError as ke: try: lang_object = languages.get(part2b=lang) return lang_object.terminology except KeyError as ke: return ''
def filename_language(p): stem = p.stem code = str(stem).rsplit('-', maxsplit=1)[-1] splitted = code.rsplit('.', maxsplit=1) lang = splitted[0] forced = splitted[-1] == 'forced' if not forced: forced = stem.casefold().rfind('forced') != -1 try: languages.get(part2b=lang) return lang, forced except KeyError: return None, forced
def map_language(language, dash3=True): """ Use ISO 639-3 ?? """ if dash3: from iso639 import languages else: from pycountry import languages if '_' in language: language = language.split('_')[0] if len(language) == 2: try: return languages.get(alpha2=language.lower()) except KeyError: pass elif len(language) == 3: if dash3: try: return languages.get(part3=language.lower()) except KeyError: pass try: return languages.get(terminology=language.lower()) except KeyError: pass try: return languages.get(bibliographic=language.lower()) except KeyError: pass else: try: return languages.get(name=language.title()) except KeyError: pass if dash3: try: return languages.get(inverted=language.title()) except KeyError: pass for l in re.split('[,.;: ]+', language): try: return languages.get(name=l.title()) except KeyError: pass
def get_language(lang): ''' Resolves the complete language name from a given language code :param lang: language code in iso format :return: ''' try: return languages.get(part2b=lang).name except: try: return languages.get(part3=lang).name except: try: return languages.get(part1=lang).name except: return lang
def convert_language(self, lang): ''' Convert alpha2 language (eg. 'en') to terminology language (eg. 'eng') ''' try: lang_object = languages.get(part1=lang) return lang_object.terminology except KeyError as ke: # TODO: Parse ISO 639-2 B/T ? log.debug('Invalid language: {ke}'.format(ke=ke)) return ''
def load(self, filename): data = json.load(codecs.open(filename, 'r', 'utf-8')) if 'uri_format' in data: self.vocabulary.uri_format = data['uri_format'] if 'default_language' in data: self.vocabulary.default_language = languages.get(alpha2=data['default_language']) if 'resources' in data: self.vocabulary.resources.load(data['resources'])
def convert_languages(key, data, errors, context): ''' Convert ISO 639-2 B and 639-3 language abbreviations to ISO 639-2 T. data['key'] may be a string with comma separated values or a single language code. :param key: key :param data: data :param errors: validation errors :param context: context ''' value = data.get(key) if not isinstance(value, basestring): return new_languages = [] for lang in value.split(','): lang = lang.strip().lower() if lang: try: languages.get(part2b=lang) new_languages.append(lang) except KeyError: try: languages.get(part3=lang) new_languages.append(lang) except KeyError: try: # Convert two character language codes lang_object = languages.get(part1=lang) new_languages.append(lang_object.part2t) except KeyError as ke: errors[key].append(_('Language %s not in ISO 639-2 T format') % lang) # We could still try to convert from ISO 639-2 B if it shows up somewhere if new_languages: data[key] = ', '.join(new_languages)
def test_multiple_types(self): # A concept with two types should generate two records voc = Vocabulary() voc.default_language = languages.get(alpha2='nb') voc.resources.load([{ 'id': '1', 'prefLabel': {'nb': {'value': 'Science fiction'}}, 'type': ['GenreForm', 'Topic'] }]) m21 = Marc21(voc) tree = etree.parse(BytesIO(m21.serialize())) c = tree.xpath('count(//m:record)', namespaces={'m': 'http://www.loc.gov/MARC21/slim'}) self.assertEqual(2, c)
def test_load(self): # Should accept a Vocabulary object c = [ { 'id': '1', 'prefLabel': {'nb': { 'value': 'Forente nasjoner' }}, 'type': ['Topic'] } ] voc = Vocabulary() voc.resources.load(c) voc.default_language = languages.get(alpha2='nb') m21 = Marc21(voc) self.assertEqual(Resources, type(m21.vocabulary.resources))
con = sqlite3.connect(dbfile) c = con.cursor() known = dict() c.execute("""SELECT id, iso639 from lang""") for (lid, l3) in c: known[l3] = lid for l3 in "eng cmn".split(): # for l3 in "eng als arb bul cmn dan ell fas fin fra heb hrv ita jpn cat eus glg spa ind zsm nno nob pol por slv swe tha aar afr aka amh asm aze bam bel ben bod bos bre ces cor cym deu dzo epo est ewe fao ful gla gle glv guj hau hin hun hye ibo iii ina isl kal kan kat kaz khm kik kin kir kor lao lav lin lit lub lug mal mar mkd mlg mlt mon mya nbl nde nep nld oci ori orm pan pus roh ron run rus sag sin slk sme sna som sot srp ssw swa tam tel tgk tir ton tsn tso tur ukr urd uzb ven vie xho yor zul ang arz ast chr fry fur grc hat hbs ido kur lat ltg ltz mri nan nav rup san scn srd tat tgl tuk vol yid yue".split(): if l3 in known: ### already in continue l = languages.get(part3=l3) if l.part1: ### use the two letter code if it exists bcp47 = l.part1 else: bcp47 = l3 # INSERT LANG DATA (CODES AND NAMES) u = 'omw' c.execute("""INSERT INTO lang (bcp47, iso639, u) VALUES (?,?,?)""", (bcp47,l3,u)) c.execute("""SELECT MAX(id) FROM lang""") lang_id = c.fetchone()[0]
def setUpClass(cls): cls.english = languages.get(name='English') cls.chinese = languages.get(name='Chinese') cls.arabic = languages.get(name='Arabic') cls.moroccan = languages.get(name='Moroccan Arabic') cls.tzeltal = languages.get(name='Tzeltal')
def test_part3(self): self.assertEqual(languages.get(part3='eng').name, 'English') self.assertEqual(languages.part3['eng'].name, 'English')
def test_inverted(self): self.assertEqual(languages.get(inverted='Arabic, Moroccan').name, 'Moroccan Arabic') self.assertEqual(languages.inverted['Arabic, Moroccan'].name, 'Moroccan Arabic')
def test_collective_name(self): self.assertEqual(languages.get(name='Bihari languages').part2b, 'bih') self.assertEqual(languages.get(name='Sami languages').part2b, 'smi') self.assertEqual(languages.get(name='Bihari languages').part5, 'bih') self.assertEqual(languages.get(name='Sami languages').part5, 'smi')
def test_macro_name(self): # TODO: self.assertEqual(languages.get(name='Standard Estonian').macro, languages.get(alpha3='est')) self.assertEqual(languages.get(name='Standard Estonian').macro, 'est')
def test_part2b(self): self.assertEqual(languages.get(part2b='dut').name, 'Dutch') self.assertEqual(languages.part2b['dut'].name, 'Dutch')
def test_name(self): self.assertEqual(languages.get(name='English').part3, 'eng') self.assertEqual(languages.name['English'].part3, 'eng') assert languages.get(name='Sanapaná').part3 == 'spn'
def test_retired_code(self): # TODO: self.assertEqual(languages.get(alpha3='ron').retired, 'mol') assert languages.get(part3='ron') in languages.get(retired='mol')[1] assert languages.part3['ron'] in languages.retired['mol'][1] assert isinstance(languages.get(retired='ppr')[2], str) assert isinstance(languages.retired['ppr'][2], str) assert isinstance(languages.get(retired='ppr')[0], datetime) assert isinstance(languages.retired['ppr'][0], datetime) assert languages.get(retired='ppr')[1] == [] assert languages.retired['ppr'][1] == [] assert languages.get(retired='sh') is languages.get(part3='hbs') assert languages.retired['sh'] is languages.part3['hbs'] assert languages.get(retired='ccy')[1] == [ languages.get(part3='zhn'), languages.get(part3='zyg'), languages.get(part3='zyn'), languages.get(part3='zzj'), languages.get(part3='zhd')]
def test_part1(self): self.assertEqual(languages.get(part1='en').name, 'English') self.assertEqual(languages.part1['en'].name, 'English')
def test_part2t(self): self.assertEqual(languages.get(part2t='nld').name, 'Dutch') self.assertEqual(languages.part2t['nld'].name, 'Dutch')