Python standardize_tag Exemples, langcodes.standardize_tag Python Exemples

Exemple #1

0

Afficher le fichier

def langcode_to_tmxcode(langcode: str, default="en_US") -> str:
    """ convert langcode str to a TMX code (langid-LOCALE).

    >>> langcode_to_tmxcode("zh")
    'zh-CN'
    >>> langcode_to_tmxcode("zh-CHS")
    'zh-CN'
    >>> langcode_to_tmxcode("zh-CHT")
    'zh-TW'
    >>> langcode_to_tmxcode("en")
    'en-US'
    >>> langcode_to_tmxcode("en-uk")
    'en-GB'
    >>> langcode_to_tmxcode("de")
    'de-DE'
    >>> langcode_to_tmxcode("en-ca")
    'en-CA'
    >>> langcode_to_tmxcode("pt")
    'pt-PT'
    """
    if langcode.lower() == "zh-cht":
        langcode = "zh-tw"

    if langcode.lower() == "pt":
        langcode = "pt-pt"

    lc_ = standardize_tag(langcode)

    try:
        tmxcode = closest_match(lc_, TMX_CODES)[0]
    except Exception as exc:
        logger.warning(" exc: %s, returning en-US", exc)
        tmxcode = default

    return tmxcode

Exemple #2

0

Afficher le fichier

 def get_country_data(self):
     countries = self.get_all_countries()
     for c in countries:
         name = c["name"].lower()
         self.countries_data[name] = {}
         self.countries_data[name]["timezones"] = c["timezones"]
         self.countries_data[name]["demonym"] = c["demonym"]
         self.countries_data[name]["currencies"] = c["currencies"]
         self.countries_data[name]["alpha2Code"] = c["alpha2Code"]
         self.country_codes[c["alpha2Code"]] = name
         self.countries_data[name]["alpha3Code"] = c["alpha3Code"]
         self.country_codes[c["alpha3Code"]] = name
         self.countries_data[name]["area"] = str(c["area"])
         self.countries_data[name]["languages"] = [
             langcodes.LanguageData(language=l).language_name()
             for l in c["languages"]
         ]
         self.countries_data[name]["lang_codes"] = [
             langcodes.standardize_tag(l) for l in c["languages"]
         ]
         self.countries_data[name]["capital"] = c["capital"]
         self.countries_data[name]["borders"] = c["borders"]
         self.countries_data[name]["nativeName"] = c["nativeName"]
         self.countries_data[name]["population"] = str(c["population"])
         self.countries_data[name]["region"] = c["region"]
         self.countries_data[name]["subregion"] = c["subregion"]
         if len(c["latlng"]):
             self.countries_data[name]["lat"], \
             self.countries_data[name]["long"] = c["latlng"]

Exemple #3

0

Afficher le fichier

Fichier : xkb_ldml.py Projet : hickford/xkb_ldml

def language(layout: LayoutDetails) -> str:
    # https://tools.ietf.org/html/bcp47
    if not layout.iso639:
        raise ValueError(f"no language for {layout.xkb_name()}")
    tag = layout.iso639[0]
    if layout.iso3166:
        tag += "-" + layout.iso3166[0]
    return langcodes.standardize_tag(tag)

Exemple #4

0

Afficher le fichier

Fichier : scimbase.py Projet : SUNET/eduid-scimapi

 def _deserialize(self, value: str, attr, data, **kwargs):
     try:
         # TODO: Does not validate that the input is a correct language tag
         # Replaces overlong tags with their shortest version, and also formats them according to the
         # conventions of BCP 47.
         return standardize_tag(value, macro=True)
     except ValueError as e:
         raise ValidationError(f'{e}')

Exemple #5

0

Afficher le fichier

    def transform(self, data):
        raw, xml = data
        xpath = ".//dc:subject"

        for item in raw.findall(xpath, namespaces=ns):
            lang = item.get('{http://www.w3.org/XML/1998/namespace}lang')
            field = ET.Element('field')
            field.text = item.text
            field.set('name', 'keyword_{}'.format(standardize_tag(lang[0:2])))
            xml.find('.').append(field)
        return data

Exemple #6

0

Afficher le fichier

    def transform(self, data):
        xpath = ".//dc:language"
        raw, xml = data

        for lang in raw.findall(xpath, namespaces=ns):
            field = ET.Element('field')

            field.text = standardize_tag(lang.text)
            field.set('name', 'la')
            xml.find('.').append(field)

        return data

Exemple #7

0

Afficher le fichier

    def transform(self, data):
        xpath = ".//dc:identifier"
        raw, xml = data

        for url in raw.findall(xpath, namespaces=ns):
            if url.text.startswith('http'):
                for lang in raw.findall(".//dc:language", namespaces=ns):
                    field = ET.Element('field')
                    field.text = url.text
                    field.set('name', 'fulltext_html_%s' % standardize_tag(lang.text))
                    xml.find('.').append(field)
        return data

Exemple #8

0

Afficher le fichier

    def __init__(self,
                 width: int,
                 height: int,
                 storage: Storage = LocalStorage(),
                 style: str = 'newsworthy',
                 language: str = 'en-GB'):
        """
        :param width: width in pixels
        :param height: height in pixels
        :param storage: storage object that will handle file saving. Default
                        LocalStorage() class will save a file the working dir.
        :param style: a predefined style or the path to a custom style file
        :param language: a BCP 47 language tag (eg `en`, `sv-FI`)
        """
        try:
            self.api_token = os.environ["DATAWRAPPER_API_KEY"]
        except KeyError:
            raise Exception("DATAWRAPPER_API_KEY must be set in environment")

        # P U B L I C   P R O P E R T I E S
        # The user can alter these at any time

        # Unlike regular Chart objects Datawrapper does not use the DataList
        # class for storing data, as DataList does not handle non-numerical
        # data. DatawrapperCharts will understand the same list-of-list-of-list
        # structure as DataList builds upon, but prefer consuming list of
        # dictionaries
        self.data = []
        self.labels = []  # Optionally one label for each dataset
        self.annotations = []  # Manually added annotations
        self.caption = None
        self.highlight = None
        self.decimals = None

        self.dw_data = {}  # The DW data structure that defines the chart
        self._dw_id = None  # Datawrapper chart id

        # P R I V A T E   P R O P E R T I E S
        # Properties managed through getters/setters
        self._title = None
        self._units = "count"

        # Calculated properties
        self._storage = storage
        self._w, self._h = int(width), int(height)
        self._style = loadstyle(style)
        # Standardize and check if language tag is a valid BCP 47 tag
        self._language = standardize_tag(language)
        self._locale = Locale.parse(self._language.replace("-", "_"))

        # For renaming regions to DW conventions
        self._translations = None

Exemple #9

0

Afficher le fichier

    def transform(self, data):
        raw, xml = data
        xpath = ".//dc:description"

        for item in raw.findall(xpath, namespaces=ns):
            lang = item.get('{http://www.w3.org/XML/1998/namespace}lang')
            if "-" in lang:
                lang = lang.split("-")[0]
            field = ET.Element('field')
            field.text = item.text
            field.set('name', 'ab_{}'.format(standardize_tag(lang)))
            xml.find('.').append(field)
        return data

Exemple #10

0

Afficher le fichier

def append_input_component(details: LayoutDetails):
    obj = dict[str, object]()
    obj['name'] = f"XKB's {details.xkb_name()} -- {details.description}"
    obj['id'] = "all-xkb-layouts-" + details.xkb_name()
    if details.iso639:
        # languages = [f"{language}-{country}" for country in details.countries for language in details.languages] if details.countries else details.languages
        obj['language'] = [
            langcodes.standardize_tag(language) for language in details.iso639
        ]  # multiple allowed
    else:
        # necessary otherwise switching to layoyut will crash Chrome OS
        obj['language'] = ['??']
    obj['layouts'] = [details.xkb_name()]  # list of one
    manifest['input_components'].append(obj)

Exemple #11

0

Afficher le fichier

Fichier : feedgen.py Projet : unwiredben/media.ccc.de-on-roku

def process_recording(recording):
    output = {
        "dateAdded":
        recording["updated_at"],
        "videos": [{
            "url": recording["recording_url"],
            "quality": "HD",
            "videoType": "MP4"
        }],
        "duration":
        recording["length"],
        "language":
        langcodes.standardize_tag(recording["language"])
    }
    return output

Exemple #12

0

Afficher le fichier

    def transform(self, data):
        raw, xml = data
        xpath = ".//dc:description"

        langs = set()
        for item in raw.findall(xpath, namespaces=ns):
            lang = item.get('{http://www.w3.org/XML/1998/namespace}lang')
            if "-" in lang:
                lang = lang.split("-")[0]
            langs.add(standardize_tag(lang))

        for language in langs:
            field = ET.Element('field')
            field.text = language
            field.set('name', 'available_languages')
            xml.find('.').append(field)

        return data

Exemple #13

0

Afficher le fichier

Fichier : semantic_web.py Projet : commonsense/conceptnet5

def parse_nquads_line(line):
    """
    Parse a line in N-Triples or N-Quads format, returning four dictionaries:
    (subj, pred, obj, graph).

    Each of the dictionaries contains fields that may or may not be present,
    indicating their parsed content:

        - 'url': a complete URL indicating a resource. (Pedants: It's an IRI,
          but it's also a URL.)
        - 'text': a string value.
        - 'lang': the language code associated with the given 'text'.
        - 'type': a URL pointing to something in the 'xsd:' namespace,
          indicating for how to interpret the given 'text' as a value.
        - 'blank': the arbitrary ID of a blank node.
    """
    items = []
    for match in NQUADS_ITEM_RE.finditer(line):
        item = {}
        for group in ['url', 'text', 'lang', 'type', 'blank', 'comment']:
            matched = match.group(group)
            if matched is not None:
                item[group] = matched
        if 'comment' in item:
            continue
        if 'url' in item:
            item['url'] = decode_url(item['url'])
        if 'lang' in item:
            item['lang'] = langcodes.standardize_tag(item['lang'])
        if 'type' in item:
            item['type'] = decode_url(item['type'])
        if 'text' in item:
            item['text'] = decode_escapes(item['text'])
        if item:
            items.append(item)
    if len(items) == 3:
        items.append({})
    # The line is either empty aside from comments, or contains a quad
    assert len(items) == 0 or len(items) == 4, line
    return items

Exemple #14

0

Afficher le fichier

Fichier : semantic_web.py Projet : shwinshaker/Text2Scene

def parse_nquads_line(line):
    """
    Parse a line in N-Triples or N-Quads format, returning four dictionaries:
    (subj, pred, obj, graph).

    Each of the dictionaries contains fields that may or may not be present,
    indicating their parsed content:

        - 'url': a complete URL indicating a resource. (Pedants: It's an IRI,
          but it's also a URL.)
        - 'text': a string value.
        - 'lang': the language code associated with the given 'text'.
        - 'type': a URL pointing to something in the 'xsd:' namespace,
          indicating for how to interpret the given 'text' as a value.
        - 'blank': the arbitrary ID of a blank node.
    """
    items = []
    for match in NQUADS_ITEM_RE.finditer(line):
        item = {}
        for group in ['url', 'text', 'lang', 'type', 'blank', 'comment']:
            matched = match.group(group)
            if matched is not None:
                item[group] = matched
        if 'comment' in item:
            continue
        if 'url' in item:
            item['url'] = decode_url(item['url'])
        if 'lang' in item:
            item['lang'] = langcodes.standardize_tag(item['lang'])
        if 'type' in item:
            item['type'] = decode_url(item['type'])
        if 'text' in item:
            item['text'] = decode_escapes(item['text'])
        if item:
            items.append(item)
    if len(items) == 3:
        items.append({})
    # The line is either empty aside from comments, or contains a quad
    assert len(items) == 0 or len(items) == 4, line
    return items

Exemple #15

0

Afficher le fichier

Fichier : cmdline_utils.py Projet : mgpanpan/autosub

def validate_aovp_args(args):  # pylint: disable=too-many-branches, too-many-return-statements, too-many-statements
    """
    Check that the commandline arguments passed to autosub are valid
    for audio or video processing.
    """
    if args.sleep_seconds < 0 or args.lines_per_trans < 0:
        raise exceptions.AutosubException(
            _("Error: \"-slp\"/\"--sleep-seconds\" arg is illegal."))

    if args.speech_language:  # pylint: disable=too-many-nested-blocks
        if not args.gspeechv2:
            args.speech_language = args.speech_language.lower()
            if args.speech_language \
                    not in constants.SPEECH_TO_TEXT_LANGUAGE_CODES:
                print(
                    _("Warning: Speech language \"{src}\" not recommended. "
                      "Run with \"-lsc\"/\"--list-speech-codes\" "
                      "to see all supported languages.").format(
                          src=args.speech_language))
                if args.best_match and 's' in args.best_match:
                    best_result = lang_code_utils.match_print(
                        dsr_lang=args.speech_language,
                        match_list=list(
                            constants.SPEECH_TO_TEXT_LANGUAGE_CODES.keys()),
                        min_score=args.min_score)
                    if best_result:
                        print(
                            _("Use langcodes-py2 to standardize the result."))
                        args.speech_language = langcodes.standardize_tag(
                            best_result[0])
                        print(
                            _("Use \"{lang_code}\" instead.").format(
                                lang_code=args.speech_language))
                    else:
                        print(
                            _("Match failed. Still using \"{lang_code}\".").
                            format(lang_code=args.speech_language))

            if args.min_confidence < 0.0 or args.min_confidence > 1.0:
                raise exceptions.AutosubException(
                    _("Error: The arg of \"-mnc\"/\"--min-confidence\" isn't legal."
                      ))

        if args.dst_language is None:
            print(
                _("Destination language not provided. "
                  "Only performing speech recognition."))

        else:
            if not args.src_language:
                print(
                    _("Source language not provided. "
                      "Use Speech language instead."))
                args.src_language = args.speech_language
                if not args.best_match:
                    args.best_match = {'src'}
                elif 'src' not in args.best_match:
                    args.best_match.add('src')

            is_src_matched = False
            is_dst_matched = False

            for key in googletrans.constants.LANGUAGES:
                if args.src_language.lower() == key.lower():
                    args.src_language = key
                    is_src_matched = True
                if args.dst_language.lower() == key.lower():
                    args.dst_language = key
                    is_dst_matched = True

            if not is_src_matched:
                if not args.gtransv2:
                    if args.best_match and 'src' in args.best_match:
                        print(
                            _("Warning: Source language \"{src}\" not supported. "
                              "Run with \"-lsc\"/\"--list-translation-codes\" "
                              "to see all supported languages.").format(
                                  src=args.src_language))
                        best_result = lang_code_utils.match_print(
                            dsr_lang=args.src_language,
                            match_list=list(
                                googletrans.constants.LANGUAGES.keys()),
                            min_score=args.min_score)
                        if best_result:
                            print(
                                _("Use \"{lang_code}\" instead.").format(
                                    lang_code=best_result[0]))
                            args.src_language = best_result[0]
                        else:
                            raise exceptions.AutosubException(
                                _("Match failed. Still using \"{lang_code}\". "
                                  "Program stopped.").format(
                                      lang_code=args.src_language))

                    else:
                        raise exceptions.AutosubException(
                            _("Error: Source language \"{src}\" not supported. "
                              "Run with \"-lsc\"/\"--list-translation-codes\" "
                              "to see all supported languages. "
                              "Or use \"-bm\"/\"--best-match\" to get a best match."
                              ).format(src=args.src_language))

            if not is_dst_matched:
                if not args.gtransv2:
                    if args.best_match and 'd' in args.best_match:
                        print(
                            _("Warning: Destination language \"{dst}\" not supported. "
                              "Run with \"-lsc\"/\"--list-translation-codes\" "
                              "to see all supported languages.").format(
                                  dst=args.dst_language))
                        best_result = lang_code_utils.match_print(
                            dsr_lang=args.dst_language,
                            match_list=list(
                                googletrans.constants.LANGUAGES.keys()),
                            min_score=args.min_score)
                        if best_result:
                            print(
                                _("Use \"{lang_code}\" instead.").format(
                                    lang_code=best_result[0]))
                            args.dst_language = best_result[0]
                        else:
                            raise exceptions.AutosubException(
                                _("Match failed. Still using \"{lang_code}\". "
                                  "Program stopped.").format(
                                      lang_code=args.dst_language))

                    else:
                        raise exceptions.AutosubException(
                            _("Error: Destination language \"{dst}\" not supported. "
                              "Run with \"-lsc\"/\"--list-translation-codes\" "
                              "to see all supported languages. "
                              "Or use \"-bm\"/\"--best-match\" to get a best match."
                              ).format(dst=args.dst_language))

        if args.dst_language == args.speech_language \
                or args.src_language == args.dst_language:
            print(
                _("Speech language is the same as the Destination language. "
                  "Only performing speech recognition."))
            args.dst_language = None
            args.src_language = None

    else:
        if args.ext_regions:
            if not args.keep:
                raise exceptions.AutosubException(
                    _("You've already input times. "
                      "No works done."))

        else:
            print(
                _("Speech language not provided. "
                  "Only performing speech regions detection."))

    if args.styles == ' ':
        # when args.styles is used but without option
        # its value is ' '
        if not args.ext_regions:
            raise exceptions.AutosubException(
                _("Error: External speech regions file not provided."))
        else:
            args.styles = args.ext_regions

Exemple #16

0

Afficher le fichier

Fichier : extract_wiktionary.py Projet : 205113/conceptnet5

 def _get_language_code(self, heading):
     match = JA_LANGUAGE_RE.match(heading)
     if match:
         return langcodes.standardize_tag(match.group(1))
     else:
         return None

Exemple #17

0

Afficher le fichier

    def __init__(self, width: int, height: int, storage: Storage=LocalStorage(),
                 style: str='newsworthy', language: str='en-GB'):
        """
        :param width: width in pixels
        :param height: height in pixels
        :param storage: storage object that will handle file saving. Default
                        LocalStorage() class will save a file the working dir.
        :param style: a predefined style or the path to a custom style file
        :param language: a BCP 47 language tag (eg `en`, `sv-FI`)
        """

        # P U B L I C   P R O P E R T I E S
        # The user can alter these at any time
        self.data = DataList()  # A list of datasets
        self.annotate_trend = True  # Print out values at points on trendline?
        self.trendline = []  # List of x positions, or data points
        self.labels = []  # Optionally one label for each dataset
        self.annotations = []  # Manually added annotations
        self.interval = None  # yearly|quarterly|monthly|weekly|daily
        # We will try to guess interval based on the data,
        # but explicitly providing a value is safer. Used for finetuning.
        self.show_ticks = True  # toggle category names, dates, etc
        self.subtitle = None
        self.note = None
        self.xlabel = None
        self.ylabel = None
        self.caption = None
        self.highlight = None
        self.decimals = None
        # number of decimals to show in annotations, value ticks, etc
        # None means automatically chose the best number
        self.logo = None
        # Path to image that will be embedded in the caption area
        # Can also be set though a style property
        self.color_fn = None
        # Custom coloring function

        # P R I V A T E   P R O P E R T I E S
        # Properties managed through getters/setters
        self._title = None
        self._units = "count"

        # Calculated properties
        self._annotations = []  # Automatically added annotations
        self._storage = storage
        self._w, self._h = int(width), int(height)
        self._style = loadstyle(style)
        # Standardize and check if language tag is a valid BCP 47 tag
        self._language = standardize_tag(language)
        self._locale = Locale.parse(self._language.replace("-", "_"))

        # Dynamic typography
        self._title_font = FontProperties()
        self._title_font.set_family(self._style["title_font"])
        self._title_font.set_size(self._style["figure.titlesize"])
        self._title_font.set_weight(self._style["figure.titleweight"])

        self._fig = Figure()
        FigureCanvas(self._fig)
        self.ax = self._fig.add_subplot(111)
        # self._fig, self.ax = plt.subplots()
        self.value_axis = self.ax.yaxis
        self.category_axis = self.ax.xaxis

        # Calculate size in inches
        self._set_size(width, height)

        # Chart elements. Made available for fitting.
        self._title_elem = None
        self._subtitle_elem = None
        self._note_elem = None
        self._caption_elem = None
        self._logo_elem = None

Exemple #18

0

Afficher le fichier

    def CQS_match_query_phrase(self, phrase):
        """Analyze phrase to see if it is a play-able phrase with this skill.

                Needs to be implemented by the skill.

                Arguments:
                    phrase (str): User phrase, "What is an aardwark"

                Returns:
                    (match, CQSMatchLevel[, callback_data]) or None: Tuple containing
                         a string with the appropriate matching phrase, the PlayMatch
                         type, and optionally data to return in the callback if the
                         match is selected.
        """
        response = None
        match = self.intents.calc_intent(phrase)
        level = CQSMatchLevel.CATEGORY
        data = match.matches
        intent = match.name
        score = match.conf
        data["intent"] = intent
        data["score"] = score

        if score > 0.8:
            level = CQSMatchLevel.EXACT
        elif score > 0.5:
            level = CQSMatchLevel.CATEGORY
        elif score > 0.3:
            level = CQSMatchLevel.GENERAL
        else:
            intent = None

        if intent:
            # Validate extracted entities
            country = data.get("country")
            region = data.get("region")
            language = data.get("language")

            if country:
                data["query"] = country
                # ensure we really have a country name
                response = self.dialog_renderer.render("bad_country", {})
                match, score = match_one(country.lower(),
                                         list(self.countries_data.keys()))
                self.log.debug("Country fuzzy match: {n}, Score: {s}".format(
                    n=match, s=score))
                if score > 0.5:
                    country = match
                    data.update(self.countries_data[country])
                else:
                    countries = self.search_country(country)
                    if not len(countries) > 0:
                        level = CQSMatchLevel.GENERAL
                    else:
                        country = countries[0]["name"]
                        data.update(countries[0])
                        # TODO disambiguation
                        if len(countries) > 1:
                            data["disambiguation"] = countries[1:]
                            self.log.debug("multiple matches found: " +
                                           str([c["name"] for c in countries]))
                data["country"] = country  # normalized from match

            if language:
                data["query"] = language
                # ensure we really have a language name
                words = language.split(" ")
                clean_up = ["is"]
                # remove words commonly caught by mistake in padatious
                language = " ".join(
                    [word for word in words if word not in clean_up])
                lang_code = langcodes.find_name(
                    'language', language, langcodes.standardize_tag(self.lang))
                lang_code = str(lang_code)
                self.log.debug("Detected lang code: " + lang_code)
                if not lang_code:
                    return None
                data["lang_code"] = lang_code
                # TODO
                countries = self.search_country_by_language(lang_code)
                data["country_list"] = countries

            if region:
                data["query"] = region
                # ensure we really have a region name
                response = self.dialog_renderer.render("bad_region")
                countries = None
                match, score = match_one(region, self.regions)
                data["region_score"] = score

                if score > 0.5:
                    region = match
                    countries = self.search_country_by_region(region)

                match, score2 = match_one(region, self.subregions)
                data["subregion_score"] = score2
                if score2 > score:
                    region = match
                    countries = self.search_country_by_subregion(region)

                if score > 0.8 and not country:
                    level = CQSMatchLevel.EXACT
                elif score > 0.5 and not country:
                    level = CQSMatchLevel.CATEGORY
                elif score > 0.3 and not country:
                    level = CQSMatchLevel.GENERAL

                data["region"] = region
                self.log.debug("Detected region: " + region)
                data["country_list"] = countries

            # Get response from intents
            response = self.intent2answer(intent, data) or response

            if response:
                return (phrase, level, response, data)
        return None

Exemple #19

0

Afficher le fichier

Fichier : __init__.py Projet : LaggAt/hovercraft

 def parse_track(self, item):
     options = {}
     error = False
     original = item  # preserve for error messages
     item = item.replace('\r', ' ').replace('\n', ' ')
     try:
         head, _emptyStr, lang_kind, _emptyStr, tail = re.split(
             r"(^| )\((.*?)\)( |$)", item)
         lang_kind = lang_kind.split()  # split input into a list of words
         kinds = set(lang_kind) & set(('captions', 'descriptions',
                                       'chapters', 'metadata', 'subtitles'))
         # Find kind
         for kind in kinds:
             if 'kind' not in options: options['kind'] = kind
             else:
                 error = True
                 continue
             lang_kind.remove(kind)
         # Find language
         for lang in lang_kind:
             if 'language' not in options:
                 if langcodes.code_to_names(
                         'language',
                         langcodes.get(
                             langcodes.standardize_tag(lang)).language):
                     options['language'] = langcodes.standardize_tag(lang)
                 else:  # lang is not a lang code. Try interpreting as a language name
                     try:
                         options['language'] = str(langcodes.find(lang))
                     except:
                         error = True
                         continue
             else:
                 error = True
                 continue
         item = head + ' ' + tail
     except:
         error = True
     if 'kind' not in options: options['kind'] = 'subtitles'
     if 'language' not in options:
         try:
             options['language'] = langcodes.standardize_tag(getlocale()[0])
         except:
             options['language'] = 'en'
     # find label
     try:
         head, _emptyStr, _quote, label, _emptyStr, tail = re.split(
             r"""(^| )(["'])(.*?)\2( |$)""", item)
         if head and tail: error = True
         item = head + tail
         options['label'] = label.strip()
     except:
         try:
             options['label'] = options['kind'].capitalize(
             ) + ' in ' + langcodes.get(
                 options['language']).autonym().capitalize()
         except:
             error = True
             options['label'] = None
     # get filename
     options['src'] = self.uri_check(item)
     # return error
     if error:
         self.state_machine.reporter.error(
             'Error in "%s" directive: \n Problems encountered parsing track "%s" \n\n'
             'Guessing the following values: \n'
             'filename: "%s" \n'
             'kind: "%s" \n'
             'language: "%s" \n'
             'label: "%s" \n\n'
             'Track kinds should be chosen from one of the following: \n'
             'captions, descriptions, chapters, metadata, subtitles \n'
             'Track languages should be given as BCP 47 compliant language codes. \n'
             'Track declarations should take the following form: \n'
             'filename (kind language_code) "label"\n'
             'Tracks must have one filename and one language_code. \n'
             'If a kind is not specified, "subtitles" will be assumed. \n'
             'If a label is not provided, it will be auto-generated from the kind and language specified.'
             % (self.name, original, options['src'], options['kind'],
                options['language'], options['label']),
             nodes.literal_block(self.block_text, self.block_text),
             line=self.lineno)
     track_node = track(self.block_text, **options)
     return track_node

Exemple #20

0

Afficher le fichier

def is_valid_lang_code(lang_code):
    try:
        standardize_tag(lang_code)
    except LanguageTagError:
        return '{} is not a valid language code.'.format(lang_code)
    return True

Exemple #21

0

Afficher le fichier

Fichier : extract_wiktionary.py Projet : zhengyueling1993/conceptnet5

 def _get_language_code(self, heading):
     match = JA_LANGUAGE_RE.match(heading)
     if match:
         return langcodes.standardize_tag(match.group(1))
     else:
         return None