Example #1
0
 def get(cls, language):
     try:
         if PYCOUNTRY:
             # lookup workaround for alpha_2 language codes
             lang = languages.get(alpha_2=language) if re.match(
                 r"^[a-z]{2}$", language) else languages.lookup(language)
             return Language(lang.alpha_2, lang.alpha_3, lang.name,
                             getattr(lang, "bibliographic", None))
         else:
             lang = None
             if len(language) == 2:
                 lang = languages.get(alpha2=language)
             elif len(language) == 3:
                 for code_type in ['part2b', 'part2t', 'part3']:
                     try:
                         lang = languages.get(**{code_type: language})
                         break
                     except KeyError:
                         pass
                 if not lang:
                     raise KeyError(language)
             else:
                 raise KeyError(language)
             return Language(lang.alpha2, lang.part3, lang.name, lang.part2b
                             or lang.part2t)
     except (LookupError, KeyError):
         raise LookupError("Invalid language code: {0}".format(language))
Example #2
0
    def _has_valid_datatypes(self):
        """Assumes a valid bag/bag info; returns true if all datatypes in bag pass"""
        dates = []
        for k, v in self.bag_info_data.items():
            if k in self.bag_dates_to_validate:
                dates.append(v)

        langz = self.bag_info_data.get("Language", None)
        if dates:
            for date in dates:
                try:
                    iso8601.parse_date(date)
                except Exception as e:
                    print("Invalid date value: {}".format(date))
                    self.bag_exception = "Invalid date value: {}".format(date)
                    return False

        if langz:
            if not isinstance(langz, list):
                langz = [langz]
            for language in langz:
                try:
                    languages.get(part2b=language)
                except KeyError as e:
                    print("Invalid language value: {}".format(language))
                    self.bag_exception = "Invalid language value: {}".format(language)
                    return False
        return True
Example #3
0
async def tr(e):
        s = e.pattern_match.group(1)
        if e.is_reply: 
        	s = await e.get_reply_message()
        	s = s.message
        	if e.pattern_match.group(1):
        		to = e.pattern_match.group(1)
        	else:
        		to = 'en'
        	text = trans.translate(s, dest=to)
        	frm = languages.get(part1=text.src).name
        	await e.reply('From: '+frm+'\n'+text.text)
        	return
        to = re.findall(r"to=\w+", s)
        try:
        	to = to[0]
        	to = to.replace('to=', '')
        	s = s.replace('to='+to+' ', '')
        	print(s)
        	print('to='+to)
        except IndexError:
        	to = 'en'
        try:
        	text = trans.translate(s, dest=to)
        except:
        	await e.edit("Maybe wrong code name")
        	return
        frm = languages.get(part1=text.src).name
        await e.reply('From: '+frm+'\n'+text.text)
Example #4
0
def l2l3(l):
    """convert the language name
    >>> l2l3('en')
    ('eng', 'English')
    >>> l2l3('as')
    ('apc', 'Arabic, Syrian')
    >>> l2l3('ar')
    ('arb', 'Arabic')
    """
    try:
        if l == 'pb':
            language = 'Por., Brazil'
            l3 = 'por'
        elif l == 'ms':
            language = 'Malay'
            l3 = 'zsm'
        elif l == 'as':
            language = 'Arabic, Syrian'
            l3 = 'apc'
        elif l == 'ar':
            language = 'Arabic'
            l3 = 'arb'
        elif l == 'zh':
            language = 'Chinese, Mandarin'
            l3 = 'cmn'
        else:
            language = languages.get(alpha2=l).name
            l3 = languages.get(alpha2=l).part3
    except:
        language = l
        l3 = 'unk'
    return l3, language
Example #5
0
def convert_language_code(lang, to_format, throw_exceptions=True):
    '''
    Convert ISO 639 language code to <to_format>. Throws KeyError if none found.

    :param throw_exceptions: Set to False to never throw KeyError.
    :param lang: original language code
    :param to_format: 'alpha2' or 'alpha3'
    '''

    mappings = {'alpha2': 'part1', 'alpha3': 'part2b'}
    if to_format in mappings:
        to_format = mappings[to_format]

    if throw_exceptions:
        catch = [KeyError, None]
    else:
        catch = [Exception, Exception]

    try:
        return getattr(languages.get(part2b=lang), to_format)
    except catch[0]:
        try:
            return getattr(languages.get(part3=lang), to_format)
        except catch[0]:
            try:
                return getattr(languages.get(part1=lang), to_format)
            except catch[1]:
                return ''
Example #6
0
    def test_extract_subs(self):
        tmp_dir = tempfile.mkdtemp()

        file = os.path.join(tmp_dir, 'fragment.mkv')
        shutil.copyfile('fragment.mkv', file)
        with Storage(':memory:') as storage:
            app_run_config = AppRunConfig(tmp_dir, [languages.get(part1=x) for x in ['ru', 'en', 'fr']],
                                          [(languages.get(part1='ru'), languages.get(part1='en'))], ".*", {}, False,
                                          False)

            extract_subs = ExtractSubs(app_run_config, storage)
            extract_subs.scan_files()

            video_files = storage.get_all_video_files()
            self.assertEqual(1, len(video_files))
            fragment_file = video_files[0]
            subtitles = storage.get_all_subtitles_by_video_file_id(fragment_file['id'])
            self.assertEqual(5, len(subtitles))
            subtitles_file_names = set(os.path.basename(subtitle['full_path']) for subtitle in subtitles)
            self.assertEqual({'fragment_eng_SDH.srt', 'fragment_fra.srt', 'fragment.rus_eng.ass',
                              'fragment_rus_Forced.srt', 'fragment_rus.srt'}, subtitles_file_names)
            # 1 - cause forced subtitles is empty, file is empty, can't merge
            self.assertEqual(1, len(storage.get_all_merged_subtitles_by_video_file_id(fragment_file['id'])))

        shutil.rmtree(tmp_dir, ignore_errors=True)
Example #7
0
def setup_wizard(_: dict):
    """Return wizard steps for setting metadata variables."""
    language_list = [{
        "value":
        lang,
        "name":
        languages.get(part3=lang).name if lang in languages.part3 else lang
    } for lang in registry.languages]
    language_list.sort(key=lambda x: x["name"])
    language_default = {
        "value": "swe",
        "name": languages.get(part3="swe").name
    }

    questions = [{
        "type": "text",
        "name": "metadata.id",
        "message": "Machine name of corpus (a-z, 0-9, -):",
        "validate": lambda x: bool(re.match(r"^[a-z0-9-]+$", x))
    }, {
        "type": "text",
        "name": "metadata.name.eng",
        "message": "Human readable name of corpus:"
    }, {
        "type": "select",
        "name": "metadata.language",
        "message": "What language are your source files?",
        "choices": language_list,
        "default": language_default
    }]
    return questions
Example #8
0
 def get(cls, language):
     try:
         if PYCOUNTRY:
             c = languages.lookup(language)
             return Language(c.alpha_2, c.alpha_3, c.name,
                             getattr(c, "bibliographic", None))
         else:
             l = None
             if len(language) == 2:
                 l = languages.get(alpha2=language)
             elif len(language) == 3:
                 for code_type in ['part2b', 'part2t', 'part3']:
                     try:
                         l = languages.get(**{code_type: language})
                         break
                     except KeyError:
                         pass
                 if not l:
                     raise KeyError(language)
             else:
                 raise KeyError(language)
             return Language(l.alpha2, l.part3, l.name, l.part2b
                             or l.part2t)
     except (LookupError, KeyError):
         raise LookupError("Invalid language code: {0}".format(language))
Example #9
0
def convert_language_code(lang, to_format, throw_exceptions=True):
    '''
    Convert ISO 639 language code to <to_format>. Throws KeyError if none found.

    :param throw_exceptions: Set to False to never throw KeyError.
    :param lang: original language code
    :param to_format: 'alpha2' or 'alpha3'
    '''

    mappings = {'alpha2': 'part1', 'alpha3': 'part2b'}
    if to_format in mappings:
        to_format = mappings[to_format]

    if throw_exceptions:
        catch = [KeyError, None]
    else:
        catch = [Exception, Exception]

    try:
        return getattr(languages.get(part2b=lang), to_format)
    except catch[0]:
        try:
            return getattr(languages.get(part3=lang), to_format)
        except catch[0]:
            try:
                return getattr(languages.get(part1=lang), to_format)
            except catch[1]:
                return ''
Example #10
0
def get_comic_info(tmp_file_path, original_file_name, original_file_extension):
    archive = ComicArchive(tmp_file_path)
    if archive.seemsToBeAComicArchive():
        if archive.hasMetadata(MetaDataStyle.CIX):
            style = MetaDataStyle.CIX
        elif archive.hasMetadata(MetaDataStyle.CBI):
            style = MetaDataStyle.CBI
        else:
            style = None

        if style is not None:
            loadedMetadata = archive.readMetadata(style)

    lang = loadedMetadata.language
    if len(lang) == 2:
        loadedMetadata.language = isoLanguages.get(part1=lang).name
    elif len(lang) == 3:
        loadedMetadata.language = isoLanguages.get(part3=lang).name
    else:
        loadedMetadata.language = ""

    return uploader.BookMeta(
        file_path=tmp_file_path,
        extension=original_file_extension,
        title=loadedMetadata.title or original_file_name,
        author=" & ".join([
            credit["person"]
            for credit in loadedMetadata.credits if credit["role"] == "Writer"
        ]) or u"Unknown",
        cover=extractCover(tmp_file_path, original_file_extension),
        description=loadedMetadata.comments or "",
        tags="",
        series=loadedMetadata.series or "",
        series_id=loadedMetadata.issue or "",
        languages=loadedMetadata.language)
Example #11
0
def get_iso_lang_data(lang: str) -> Tuple[Dict, Union[Dict, None]]:
    """ISO-639-x languages details for lang. Raises NotFound

    Included keys: iso-639-1, iso-639-2b, iso-639-2t, iso-639-3, iso-639-5
                   english, iso_types

    See get_language_details() for details"""

    iso_types = []

    for code_type in [f"part{lang_}" for lang_ in ISO_LEVELS] + ["name"]:
        try:
            iso639_languages.get(**{code_type: lang})
            iso_types.append(code_type)
        except KeyError:
            pass

    if not iso_types:
        raise NotFound("Not a valid iso language name/code")

    language = iso639_languages.get(**{iso_types[0]: lang})

    lang_data = {
        f"iso-639-{lang_}": getattr(language, f"part{lang_}")
        for lang_ in ISO_LEVELS
    }
    lang_data.update({"english": language.name, "iso_types": iso_types})

    if language.macro:
        return (
            lang_data,
            get_iso_lang_data(language.macro)[0],
        )  # first item in the returned tuple
    return lang_data, None
Example #12
0
def gen_translation_str_from_multilang_field(fieldkey, message, key, data, errors, context):
    '''
    Fetch all the lang* fields e.g. for fieldkey 'title' of type
    ('langtitle', n, 'lang'): u'en',
    ('langtitle', n, 'value'): u'translation'

    and generate a JSON translation string of type
    title: {'en':'translation', 'fi':'kaannos'}

    This converter is called only once for the hidden field
    where the data is then stored.

    :param fieldkey: 'title' or 'notes' currently
    :param message: translation string for parse error message
    :param key: key
    :param data: data
    :param errors: validation errors
    :param context: context
    '''
    langkey = 'lang' + fieldkey

    # For API requests, we need to validate if the
    # data is already given in the new format, and
    # no lang* fields given. In that case, do nothing.
    if data.get((fieldkey,)) and not data.get((langkey, 0, 'lang')):
        json_string = data.get((fieldkey,))

        json_data = {}
        try:
            json_data = json.loads(json_string)
        except (ValueError, TypeError):
            errors[key].append(message)

        # we also need to validate the keys:
        try:
            for k in json_data.keys():
                if k == "undefined":    # some harvesters don't have languages defined
                    continue
                try:
                    languages.get(part3=k)
                except KeyError:
                    errors[key].append(_('The language code is not in ISO639-3 format'))
        except AttributeError:
            errors[key].append(_("The given {field} string is incorrectly formatted".format(field=fieldkey)))

        return

    json_data = {}

    # loop through all the translations
    i = 0
    while data.get((langkey, i, 'lang'), []):
        lval = data[(langkey, i, 'lang')]
        rval = data[(langkey, i, 'value')]
        if rval:    # skip a language without translation
            json_data[lval] = rval
        i += 1

    if json_data:
        data[(fieldkey,)] = json.dumps(json_data)
Example #13
0
def test_translate_text_with_original_lang(text, target_lang, original_lang):
    answer = translate_text(text, target_lang, original_lang)
    assert "Hello my friend" == answer
    assert TextBlob(text).detect_language() == languages.get(
        name=original_lang.capitalize()).alpha2
    assert TextBlob(answer).detect_language() == languages.get(
        name=target_lang.capitalize()).alpha2
Example #14
0
def iso_coding(arr):
    try:
        if len(arr) == 2:
            return languages.get(part1=arr).name
        elif len(arr) == 3:
            return languages.get(part3=arr).name
    except:
        return '\\N'
Example #15
0
 def __init__(self, name):
     self.name = name
     try:
         if len(name) == 2:
             self.lang = languages.get(part1=name)
         elif len(name) == 3:
             self.lang = languages.get(part2t=name)
         else:
             self.lang = languages.get(name=name)
     except Exception:
         self.lang = name
Example #16
0
def get_subtitle_language(subtitle_filename):
    subtitle_filename = subtitle_filename.lower()
    assert subtitle_filename.endswith(".srt")
    filename_without_extension = os.path.splitext(subtitle_filename)[0]
    try:
        three_letter_iso = filename_without_extension[-3:]
        return languages.get(part2b=three_letter_iso).part2b
    except KeyError:
        try:
            two_letter_iso = filename_without_extension[-2:]
            return languages.get(part1=two_letter_iso).part2b
        except KeyError:
            return None
Example #17
0
def verbose_language(origin: Optional[str]) -> str:
    language = "unknown origin"
    if origin is not None:
        language = EXTRA_LANGUAGES.get(origin, origin)
        try:
            if len(origin) == 2:
                language = languages.get(alpha2=origin).name
            elif len(origin) == 3:
                language = languages.get(part3=origin).name
            else:
                print("???", origin)
        except KeyError:
            language = origin
    return language
Example #18
0
    def test_alternative_name(self):
        self.assertEqual(languages.get(name='Romanian').part3, 'ron')
        self.assertEqual(languages.get(name='Moldavian').part3, 'ron')
        self.assertEqual(languages.get(name='Moldovan').part3, 'ron')

        self.assertEqual(languages.get(name='Dimili').part3, 'zza')
        self.assertEqual(languages.get(name='Dimli (macrolanguage)').part3, 'zza')
        self.assertEqual(languages.get(name='Kirdki').part3, 'zza')
        self.assertEqual(languages.get(name='Kirmanjki (macrolanguage)').part3, 'zza')
        self.assertEqual(languages.get(name='Zaza').part3, 'zza')
        self.assertEqual(languages.get(name='Zazaki').part3, 'zza')
Example #19
0
    def test_alternative_name(self):
        self.assertEqual(languages.get(name='Romanian').part3, 'ron')
        self.assertEqual(languages.get(name='Moldavian').part3, 'ron')
        self.assertEqual(languages.get(name='Moldovan').part3, 'ron')

        self.assertEqual(languages.get(name='Dimili').part3, 'zza')
        self.assertEqual(languages.get(name='Dimli (macrolanguage)').part3, 'zza')
        self.assertEqual(languages.get(name='Kirdki').part3, 'zza')
        self.assertEqual(languages.get(name='Kirmanjki (macrolanguage)').part3, 'zza')
        self.assertEqual(languages.get(name='Zaza').part3, 'zza')
        self.assertEqual(languages.get(name='Zazaki').part3, 'zza')
Example #20
0
 def get_language(cls, language):
     try:
         if len(language) == 2:
             return languages.get(alpha2=language)
         elif len(language) == 3:
             for code_type in ['part2b', 'part2t', 'part3']:
                 try:
                     return languages.get(**{code_type: language})
                 except KeyError:
                     pass
             raise KeyError
         else:
             raise ValueError("Invalid language code: {0}".format(language))
     except KeyError:
         raise ValueError("Invalid language code: {0}".format(language))
Example #21
0
def test_Lang():
    #Lang, GlossaryFrom, GlossaryTo
    Lang = 'en'
    try:
        Lang = pytest.config[pytest.modulename]['Lang']
    except:
        assert True
    lang = re.sub(r'\-.*', r'', Lang)
    pytest.lang = False
    found = False
    try:
        pytest.lang = languages.get(part1=lang)
        found = True
    except:
        pass

    if not found:
        try:
            pytest.lang = languages.get(part2b=lang)
            found = True
        except:
            pass

    if not found:
        try:
            pytest.lang = languages.get(part2t=lang)
            found = True
        except:
            pass

    if not found:
        try:
            pytest.lang = languages.get(part3=lang)
            found = True
        except:
            pass

    if not found:
        try:
            pytest.lang = languages.get(part5=lang)
            found = True
        except:
            pass

    if (len(lang) == 2 or len(lang) == 3):
        assert pytest.lang
    else:
        assert True  #let this test pass as we cannot check everything
Example #22
0
    def parse(self, options):

        # 001
        self.control_number = self.record.text('mx:controlfield[@tag="001"]')

        # 010 : If present, it takes precedence over 001.
        # <https://github.com/scriptotek/mc2skos/issues/42>
        value = self.record.text('mx:datafield[@tag="010"]/mx:subfield[@code="a"]')
        if value is not None:
            self.control_number = value

        # 016 : If present, it takes precedence over 001
        # <https://github.com/scriptotek/mc2skos/issues/42>
        value = self.record.text('mx:datafield[@tag="016"]/mx:subfield[@code="a"]')
        if value is not None:
            self.control_number = value

        # 003
        self.control_number_identifier = self.record.text('mx:controlfield[@tag="003"]')

        # 005
        value = self.record.text('mx:controlfield[@tag="005"]')
        if value is not None:
            try:
                self.modified = datetime.strptime(value, '%Y%m%d%H%M%S.%f')
            except ValueError:
                logger.warning('Record %s: Ignoring invalid date in 005 field: %s', self.control_number, value)

        # 040: Record Source
        lang = self.record.text('mx:datafield[@tag="040"]/mx:subfield[@code="b"]') or 'eng'
        self.lang = languages.get(part2b=lang).part1
Example #23
0
def getiso6392t(s: str) -> str:
    t = s.split('_')[0]
    t = s.split('-')[0]
    try:
        return languages.get(alpha2=t).part2t
    except:
        return s
Example #24
0
    def __init__(self, language_code, country_code=""):

        #validate input
        if not isinstance(language_code, str):
            raise TypeError("'language_code' must be a string")
        if not isinstance(country_code, str):
            raise TypeError("'country_code' must be a string")
        if len(language_code) != 3 and len(language_code) != 2:
            raise ValueError(
                "'language_code' must be a 3-character or 2-character string")
        if country_code != "" and len(country_code) != 2:
            raise ValueError("'country_code' must be a 2-character string")
        for char in language_code:
            if char not in ALPHABET:
                raise TypeError(
                    "'language_code' must contains non-capital alphabet character"
                )
        if country_code != "":
            for char in country_code:
                if char not in CAPITAL_ALPHABET:
                    raise TypeError(
                        "'country_code' must contain capital alphabet character"
                    )

        #check and convert ISO 639-1 to ISO 639-3
        if len(language_code) == 3:
            lang = language_code
        else:
            lang = languages.get(alpha2=language_code).part3

        self.__lang_code = lang
        self.__country_code = country_code
    def test_acronym(self):
        # Expects acronym to be converted to 450 $a, having $g d
        voc = Vocabulary()
        voc.resources.load([
            {
                'id': '1',
                'prefLabel': {'nb': {
                    'value': 'Forente nasjoner',
                    'hasAcronym': 'FN'
                }},
                'type': ['Topic']
            }
        ])
        voc.default_language = languages.get(alpha2='nb')
        m21 = Marc21(voc)
        tree = etree.parse(BytesIO(m21.serialize()))

        f150 = tree.xpath('//m:record/m:datafield[@tag="150"]' +
                          '[./m:subfield[@code="a"]/text() = "Forente nasjoner"]',
                          namespaces={'m': 'http://www.loc.gov/MARC21/slim'})

        f450 = tree.xpath('//m:record/m:datafield[@tag="450"]' +
                          '[./m:subfield[@code="a"]/text() = "FN"]' +
                          '[./m:subfield[@code="g"]/text() = "d"]',
                          namespaces={'m': 'http://www.loc.gov/MARC21/slim'})

        self.assertEqual(1, len(f150))
        self.assertEqual(1, len(f450))
def process_json_tweets(rank, file_name, processes, trend_type):
  # Open the json file containing all the tweets
  with open(file_name, 'r', encoding = "utf-8") as f:
    objs = ijson.items(f, 'rows.item')
    outDic = {}

    try:
      for i, line in enumerate(objs):
        if i%processes == rank:
          try:
            if trend_type == TREND_TYPE_HASHTAG:
              # Count frequency of hashtags
              tweet = line['doc']["text"]
              tweet = re.split('[!"$%&\'()*+,-./:;<=>?@[\\]^ `{|}~]',tweet)
              #tweet = tweet.split()
              outDic = findHash(tweet,outDic)
            elif trend_type == TREND_TYPE_LANGUAGE:
              # Count frequency of languages
              lang = line["doc"]["metadata"]["iso_language_code"]
              try:
                lang = languages.get(alpha2=lang).name + "(" + lang + ")"
              except KeyError:
                lang = "Undefined" + "(" + lang + ")"
              outDic = findLang(lang, outDic)
          except ValueError:
            print("Malformed JSON in tweet", i)
          except:
            print("Unexpected error:", sys.exc_info()[0])
            raise
    except TypeError:
      print("Could not read line in json.")
  return outDic
def get_language_from_wiktionary_code(code):
    lang = None
    try:
        lang = languages.get(iso639_1_code=code)
    except KeyError:
        pass
    if not lang:
        try:
            lang = languages.get(iso639_3_code=code)
        except KeyError:
            pass
    if not lang:
        try:
            lang = languages.get(iso639_2T_code=code)
        except KeyError:
            pass
    if not lang:
        code = code.split('-')[0]
        try:
            lang = iso.get(part5=code)
            setattr(lang, 'iso639_3_code', lang.part3)
        except KeyError:
            # print("no lang for: ", code)
            pass
    return lang
Example #28
0
 def get_def(self, part3):
     from iso639 import languages
     try:
         loc = languages.get(part3=part3)
     except KeyError:
         loc = None
     return loc
Example #29
0
 def api_data(self):
     return {
         "id":
         self.pk,
         "title":
         self.title,
         "lang":
         self.lang,
         "language":
         languages.get(part3=self.lang).name,
         "completed":
         self.completed,
         "tokenCount":
         self.token_count(),
         "lemmatizationStatus":
         self.lemmatization_status(),
         "createdAt":
         self.created_at,
         "canRetry":
         self.can_retry(),
         "canCancel":
         self.can_cancel(),
         "deleteUrl":
         self.delete_url,
         "cloneUrl":
         self.clone_url,
         "clonedFrom":
         self.cloned_from.pk if self.cloned_from else None,
         "clonedFor":
         self.cloned_for.pk if self.cloned_for else None,
         "requireClone":
         self.classes.all().count() > 0,
         "handoutUrl":
         reverse("lemmatized_texts_handout", args=[self.secret_id]),
     }
Example #30
0
def printStats(lngs, lexes, syns):
    totlem = 0
    totsense = 0
    synsets = set()
    print("""\\begin{tabular}{llrrrll}
Language & Code & Synsets & Lemmas & Senses & Asia & Most Common\\\\ \\hline"""
          )
    for lng in sorted(lngs):
        # Names for asia gid=6255147
        asia = ', '.join([x[1]
                          for x in lemmas['6255147'] if x[2] == lng]) or '---'
        # Names for most common
        mx = max(len(lexes[lng][g]) for g in lexes[lng])
        maxes = [g for g in lexes[lng] if len(lexes[lng][g]) == mx]
        if len(maxes) > 3 or len(maxes) < 1:
            common == '---'
        else:
            common = ', '.join(maxes)
        numsenses = sum(len(lexes[lng][l]) for l in lexes[lng])
        print("{} &  {} & {:,d} & {:,d}  & {:,d} & {} & {} \\\\ ".format(
            languages.get(alpha2=lng).name, lng, len(syns[lng]),
            len(lexes[lng]), numsenses, asia, common))
        totlem += len(lexes[lng])
        totsense += numsenses
        synsets = synsets.union(set(syns[lng]))
    print("{} &  {} & {:,d} & {:,d}  & {:,d} & {} & {} \\\\ ".format(
        'Total', len(lngs), len(synsets), totlem, totsense, '---', '---'))
    print("""\\end{tabular}""")
Example #31
0
 async def gtr(self, ctx, language, *, text: str):
     language = language.capitalize()
     try:
         try:
             lang = languages.get(name=language)
             g = async_google_trans_new.google_translator()
             gemb = discord.Embed(
                 title='Google Translation',
                 color=self.bot.embed_color,
                 timestamp=ctx.message.created_at).set_footer(
                     text=f"Requested by {ctx.author}",
                     icon_url=ctx.author.avatar_url)
             gemb.add_field(name='Input:', value=f'```\n{text}\n```')
             gemb.add_field(
                 name=f'Output in {language}:',
                 value=f'```\n{await g.translate(text, lang.alpha2)}\n```',
                 inline=False)
             await ctx.send(embed=gemb)
         except KeyError:
             await qembed.send(ctx, 'Language not found.')
     except TypeError:
         await qembed.send(
             ctx,
             'This is different from other translate commands. In this, you actually say the language. `en` becomes `english`.'
         )
Example #32
0
    def getLangage(self, languageCode):
        if languageCode in ["und", "in", "iw", "ckb"]:
            languageCode = "und"
            languageName = "und"
        else:
            languageName = languages.get(alpha2=languageCode).name

        return [languageCode, languageName]
Example #33
0
 def test_logic_part3(self):
     self.assertIs(map_language('ary'), self.moroccan)
     self.assertIs(languages.part3['ary'], self.moroccan)
     self.assertIs(languages.get(part3='ary'), self.moroccan)
     self.assertIs(map_language('Moroccan Arabic'), self.moroccan)
     self.assertIs(map_language('Tzeltal'), self.tzeltal)
     self.assertIs(map_language('Tzeltal, Tenejapa'), self.tzeltal)
     self.assertIs(map_language('tzh'), self.tzeltal)
Example #34
0
def convert_language(lang):
    '''
    Convert alpha2 language (eg. 'en') to terminology language (eg. 'eng')
    '''

    if not lang:
        return "und"

    try:
        lang_object = languages.get(part1=lang)
        return lang_object.terminology
    except KeyError as ke:
        try:
            lang_object = languages.get(part2b=lang)
            return lang_object.terminology
        except KeyError as ke:
            return ''
Example #35
0
 def test_logic_part3(self):
     self.assertIs(map_language('ary'), self.moroccan)
     self.assertIs(languages.part3['ary'], self.moroccan)
     self.assertIs(languages.get(part3='ary'), self.moroccan)
     self.assertIs(map_language('Moroccan Arabic'), self.moroccan)
     self.assertIs(map_language('Tzeltal'), self.tzeltal)
     self.assertIs(map_language('Tzeltal, Tenejapa'), self.tzeltal)
     self.assertIs(map_language('tzh'), self.tzeltal)
Example #36
0
def convert_language(lang):
    '''
    Convert alpha2 language (eg. 'en') to terminology language (eg. 'eng')
    '''

    if not lang:
        return "und"

    try:
        lang_object = languages.get(part1=lang)
        return lang_object.terminology
    except KeyError as ke:
        try:
            lang_object = languages.get(part2b=lang)
            return lang_object.terminology
        except KeyError as ke:
            return ''
Example #37
0
def filename_language(p):
    stem = p.stem
    code = str(stem).rsplit('-', maxsplit=1)[-1]

    splitted = code.rsplit('.', maxsplit=1)
    lang = splitted[0]
    forced = splitted[-1] == 'forced'

    if not forced:
        forced = stem.casefold().rfind('forced') != -1

    try:
        languages.get(part2b=lang)

        return lang, forced
    except KeyError:
        return None, forced
Example #38
0
def map_language(language, dash3=True):
    """ Use ISO 639-3 ?? """
    if dash3:
        from iso639 import languages
    else:
        from pycountry import languages

    if '_' in language:
        language = language.split('_')[0]
    if len(language) == 2:
        try: return languages.get(alpha2=language.lower())
        except KeyError: pass
    elif len(language) == 3:
        if dash3:
            try: return languages.get(part3=language.lower())
            except KeyError: pass
        try: return languages.get(terminology=language.lower())
        except KeyError: pass
        try: return languages.get(bibliographic=language.lower())
        except KeyError: pass
    else:
        try: return languages.get(name=language.title())
        except KeyError: pass
        if dash3:
            try: return languages.get(inverted=language.title())
            except KeyError: pass
        for l in re.split('[,.;: ]+', language):
            try: return languages.get(name=l.title())
            except KeyError: pass
Example #39
0
def get_language(lang):
    '''
    Resolves the complete language name from a given language code

    :param lang: language code in iso format
    :return:

    '''

    try:
        return languages.get(part2b=lang).name
    except:
        try:
            return languages.get(part3=lang).name
        except:
            try:
                return languages.get(part1=lang).name
            except:
                return lang
Example #40
0
 def convert_language(self, lang):
     '''
     Convert alpha2 language (eg. 'en') to terminology language (eg. 'eng')
     '''
     try:
         lang_object = languages.get(part1=lang)
         return lang_object.terminology
     except KeyError as ke:
         # TODO: Parse ISO 639-2 B/T ?
         log.debug('Invalid language: {ke}'.format(ke=ke))
         return ''
Example #41
0
    def load(self, filename):
        data = json.load(codecs.open(filename, 'r', 'utf-8'))

        if 'uri_format' in data:
            self.vocabulary.uri_format = data['uri_format']

        if 'default_language' in data:
            self.vocabulary.default_language = languages.get(alpha2=data['default_language'])

        if 'resources' in data:
            self.vocabulary.resources.load(data['resources'])
Example #42
0
def convert_languages(key, data, errors, context):
    '''
    Convert ISO 639-2 B and 639-3 language abbreviations to ISO 639-2 T.
    data['key'] may be a string with comma separated values or a single language code.

    :param key: key
    :param data: data
    :param errors: validation errors
    :param context: context
    '''

    value = data.get(key)

    if not isinstance(value, basestring):
        return

    new_languages = []

    for lang in value.split(','):
        lang = lang.strip().lower()
        if lang:
            try:
                languages.get(part2b=lang)
                new_languages.append(lang)
            except KeyError:
                try:
                    languages.get(part3=lang)
                    new_languages.append(lang)
                except KeyError:
                    try:
                        # Convert two character language codes
                        lang_object = languages.get(part1=lang)
                        new_languages.append(lang_object.part2t)
                    except KeyError as ke:
                        errors[key].append(_('Language %s not in ISO 639-2 T format') % lang)
                        # We could still try to convert from ISO 639-2 B if it shows up somewhere

    if new_languages:
        data[key] = ', '.join(new_languages)
 def test_multiple_types(self):
     # A concept with two types should generate two records
     voc = Vocabulary()
     voc.default_language = languages.get(alpha2='nb')
     voc.resources.load([{
         'id': '1',
         'prefLabel': {'nb': {'value': 'Science fiction'}},
         'type': ['GenreForm', 'Topic']
     }])
     m21 = Marc21(voc)
     tree = etree.parse(BytesIO(m21.serialize()))
     c = tree.xpath('count(//m:record)',
                    namespaces={'m': 'http://www.loc.gov/MARC21/slim'})
     self.assertEqual(2, c)
Example #44
0
 def get(cls, language):
     try:
         if PYCOUNTRY:
             c = languages.lookup(language)
             return Language(c.alpha_2, c.alpha_3, c.name, getattr(c, "bibliographic", None))
         else:
             l = None
             if len(language) == 2:
                 l = languages.get(alpha2=language)
             elif len(language) == 3:
                 for code_type in ['part2b', 'part2t', 'part3']:
                     try:
                         l = languages.get(**{code_type: language})
                         break
                     except KeyError:
                         pass
                 if not l:
                     raise KeyError(language)
             else:
                 raise KeyError(language)
             return Language(l.alpha2, l.part3, l.name, l.part2b or l.part2t)
     except (LookupError, KeyError):
         raise LookupError("Invalid language code: {0}".format(language))
    def test_load(self):
        # Should accept a Vocabulary object

        c = [
                {
                    'id': '1',
                    'prefLabel': {'nb': {
                        'value': 'Forente nasjoner'
                    }},
                    'type': ['Topic']
                }
            ]
        voc = Vocabulary()
        voc.resources.load(c)
        voc.default_language = languages.get(alpha2='nb')

        m21 = Marc21(voc)
        self.assertEqual(Resources, type(m21.vocabulary.resources))
Example #46
0
con = sqlite3.connect(dbfile)
c = con.cursor()


known = dict()
c.execute("""SELECT id, iso639 from lang""")
for (lid, l3) in c:
    known[l3] = lid

for l3 in "eng cmn".split():
# for l3 in "eng als arb bul cmn dan ell fas fin fra heb hrv ita jpn cat eus glg spa ind zsm nno nob pol por slv swe tha aar afr aka amh asm aze bam bel ben bod bos bre ces cor cym deu dzo epo est ewe fao ful gla gle glv guj hau hin hun hye ibo iii ina isl kal kan kat kaz khm kik kin kir kor lao lav lin lit lub lug mal mar mkd mlg mlt mon mya nbl nde nep nld oci ori orm pan pus roh ron run rus sag sin slk sme sna som sot srp ssw swa tam tel tgk tir ton tsn tso tur ukr urd uzb ven vie xho yor zul ang arz ast chr fry fur grc hat hbs ido kur lat ltg ltz mri nan nav rup san scn srd tat tgl tuk vol yid yue".split():
    if l3 in known:  ### already in
        continue 

    l = languages.get(part3=l3)
    
    if l.part1:  ### use the two letter code if it exists
        bcp47 = l.part1
    else:
        bcp47 = l3

    
    # INSERT LANG DATA (CODES AND NAMES)
    u = 'omw'
    c.execute("""INSERT INTO lang (bcp47, iso639, u)
                  VALUES (?,?,?)""", (bcp47,l3,u))

    c.execute("""SELECT MAX(id) FROM lang""")
    lang_id = c.fetchone()[0]
Example #47
0
 def setUpClass(cls):
     cls.english = languages.get(name='English')
     cls.chinese = languages.get(name='Chinese')
     cls.arabic = languages.get(name='Arabic')
     cls.moroccan = languages.get(name='Moroccan Arabic')
     cls.tzeltal = languages.get(name='Tzeltal')
Example #48
0
 def test_part3(self):
     self.assertEqual(languages.get(part3='eng').name, 'English')
     self.assertEqual(languages.part3['eng'].name, 'English')
Example #49
0
 def test_inverted(self):
     self.assertEqual(languages.get(inverted='Arabic, Moroccan').name, 'Moroccan Arabic')
     self.assertEqual(languages.inverted['Arabic, Moroccan'].name, 'Moroccan Arabic')
Example #50
0
 def test_collective_name(self):
     self.assertEqual(languages.get(name='Bihari languages').part2b, 'bih')
     self.assertEqual(languages.get(name='Sami languages').part2b, 'smi')
     self.assertEqual(languages.get(name='Bihari languages').part5, 'bih')
     self.assertEqual(languages.get(name='Sami languages').part5, 'smi')
Example #51
0
 def test_macro_name(self):
     # TODO: self.assertEqual(languages.get(name='Standard Estonian').macro, languages.get(alpha3='est'))
     self.assertEqual(languages.get(name='Standard Estonian').macro, 'est')
Example #52
0
 def test_part2b(self):
     self.assertEqual(languages.get(part2b='dut').name, 'Dutch')
     self.assertEqual(languages.part2b['dut'].name, 'Dutch')
Example #53
0
 def test_name(self):
     self.assertEqual(languages.get(name='English').part3, 'eng')
     self.assertEqual(languages.name['English'].part3, 'eng')
     assert languages.get(name='Sanapaná').part3 == 'spn'
Example #54
0
 def test_retired_code(self):
     # TODO: self.assertEqual(languages.get(alpha3='ron').retired, 'mol')
     assert languages.get(part3='ron') in languages.get(retired='mol')[1]
     assert languages.part3['ron'] in languages.retired['mol'][1]
     assert isinstance(languages.get(retired='ppr')[2], str)
     assert isinstance(languages.retired['ppr'][2], str)
     assert isinstance(languages.get(retired='ppr')[0], datetime)
     assert isinstance(languages.retired['ppr'][0], datetime)
     assert languages.get(retired='ppr')[1] == []
     assert languages.retired['ppr'][1] == []
     assert languages.get(retired='sh') is languages.get(part3='hbs')
     assert languages.retired['sh'] is languages.part3['hbs']
     assert languages.get(retired='ccy')[1] == [
         languages.get(part3='zhn'),
         languages.get(part3='zyg'),
         languages.get(part3='zyn'),
         languages.get(part3='zzj'),
         languages.get(part3='zhd')]
Example #55
0
 def test_part1(self):
     self.assertEqual(languages.get(part1='en').name, 'English')
     self.assertEqual(languages.part1['en'].name, 'English')
Example #56
0
 def test_part2t(self):
     self.assertEqual(languages.get(part2t='nld').name, 'Dutch')
     self.assertEqual(languages.part2t['nld'].name, 'Dutch')