Ejemplo n.º 1
0
def validate_datatypes(bag):
    """Assumes a valid bag/bag info; returns true if all datatypes in bag pass"""
    dates = []
    langz = []

    bag_dates_to_validate = ['Date_Start', 'Date_End', 'Bagging_Date']
    bag_info_data = get_fields_from_file(join(str(bag), 'bag-info.txt'))

    for k, v in bag_info_data.iteritems():
        if k in bag_dates_to_validate:
            dates.append(v)
        if k == 'Language':
            langz.append(v)

    if dates:
        for date in dates:
            try:
                iso8601.parse_date(date)
            except:
                print "invalid date: '{}'".format(date)
                return False

    if langz:
        for language in langz:
            try:
                languages.lookup(language)
            except:
                print "invalid language code: '{}'".format(language)
                return False
    return True
Ejemplo n.º 2
0
 def get(cls, language):
     try:
         if PYCOUNTRY:
             c = languages.lookup(language)
             return Language(c.alpha_2, c.alpha_3, c.name,
                             getattr(c, "bibliographic", None))
         else:
             l = None
             if len(language) == 2:
                 l = languages.get(alpha2=language)
             elif len(language) == 3:
                 for code_type in ['part2b', 'part2t', 'part3']:
                     try:
                         l = languages.get(**{code_type: language})
                         break
                     except KeyError:
                         pass
                 if not l:
                     raise KeyError(language)
             else:
                 raise KeyError(language)
             return Language(l.alpha2, l.part3, l.name, l.part2b
                             or l.part2t)
     except (LookupError, KeyError):
         raise LookupError("Invalid language code: {0}".format(language))
Ejemplo n.º 3
0
    def __guess_language(self, tokens):
        try:
            from langdetect import detect
            from pycountry import languages
        except ImportError:
            print(
                "The langdetect module is required for automated language detection; install with pip install langdetect"
            )
            print("Reverting to english")
            return 'english'

        # Do language detection using langdetect
        # and map to full language name using pycountry
        words = [
            w for w in tokens
            if regex.match(r'#?[^\W\d]{2,}$', w) and not self.
            RESERVED_WORDS_PATTERN.match(w) and not self.URL_PATTERN.match(w)
        ]

        if (len(words) > 0):
            try:
                language_short = detect(' '.join(words))
                return languages.lookup(
                    language_short.split('-')[0]).name.lower()
            except langdetect.lang_detect_exception.LangDetectException:
                print('Language detection failed on string: "' +
                      ' '.join(words) + '", defaulting to English')
                return 'english'
        else:
            return 'none'
Ejemplo n.º 4
0
def translate_cmd(client: Client, message: Message):
    def translate_text(text_to_translate: str,
                       dest_lang: str = environ.get('LANGUAGE'),
                       src_lang: str = 'DETECT'):
        translator = Translator()
        result: str = ''
        for i in range(20):
            try:
                if src_lang != 'DETECT':
                    result = translator.translate(text_to_translate,
                                                  src=src_lang,
                                                  dest=dest_lang).text
                else:
                    result = translator.translate(text_to_translate,
                                                  dest=dest_lang).text
                break
            except Exception:
                translator = Translator()
        return result

    words: List[str] = message.text.split(' ')
    if len(words) == 1:
        text = translate_text(message.reply_to_message.text)
        message.edit_text(text if text != '' else "1Couldn't translate...")
        return
    if ':' in words[1]:
        try:
            langs: List[str] = words[1].split(':')
            src: str = languages.lookup(langs[0]).name
            dest: str = languages.lookup(langs[1]).name
            text = ' '.join(
                words[2:]) if len(words) > 2 else message.reply_to_message.text
            text = translate_text(text, dest, src)
            message.edit_text(text if text != '' else "2Couldn't translate...")
        except LookupError:
            message.edit_text("Couldn't find language...")
        except Exception:
            message.edit_text("3Couldn't translate...")
        return
    try:
        text = ' '.join(
            words[2:]) if len(words) > 2 else message.reply_to_message.text
        text = translate_text(text, languages.lookup(words[1]).name)
        message.edit_text(text if text != '' else "4Couldn't translate...")
    except LookupError:
        text = translate_text(message.text)
        message.edit_text(text if text != '' else "5Couldn't translate...")
Ejemplo n.º 5
0
def interactive_shell(args_file):
    """Creates interactive shell to play with model

    Args:
        model: instance of Classification

    """
    args = SaveloadHP.load(args_file)
    i2l = {}
    for k, v in args.vocab.l2i.iteritems():
        i2l[v] = k

    print("Load Model from file: %s" % (args.model_name))
    classifier = Classifier(args)
    ## load model need consider if the model trained in GPU and load in CPU, or vice versa
    if not use_cuda:
        classifier.model.load_state_dict(
            torch.load(args.model_name),
            map_location=lambda storage, loc: storage)
        # classifier.model = torch.load(args.model_dir, map_location=lambda storage, loc: storage)
    else:
        classifier.model.load_state_dict(torch.load(args.model_name))
        # classifier.model = torch.load(args.model_dir)

    print("""
To exit, enter 'EXIT'.
Enter a sentence like 
input> wth is it????""")

    while True:
        try:
            # for python 2
            sentence = raw_input("input> ")
        except NameError:
            # for python 3
            sentence = input("input> ")

        words_raw = sentence.strip()

        if words_raw == "EXIT":
            break

        words_raw = Encoder.str2uni(words_raw)
        label_prob, label_pred = classifier.predict(words_raw, 5)
        for i in xrange(5):
            print(languages.lookup(i2l[label_pred[0][i]]).name)
            print(label_prob[0][i])
Ejemplo n.º 6
0
def predict(sent, args_file):
    args = SaveloadHP.load(args_file)
    i2l = {}
    for k, v in args.vocab.l2i.iteritems():
        i2l[v] = k

    print("Load Model from file: %s" % (args.model_name))
    classifier = Classifier(args)
    ## load model need consider if the model trained in GPU and load in CPU, or vice versa
    if not use_cuda:
        classifier.model.load_state_dict(
            torch.load(args.model_name),
            map_location=lambda storage, loc: storage)
        # classifier.model = torch.load(args.model_dir, map_location=lambda storage, loc: storage)
    else:
        classifier.model.load_state_dict(torch.load(args.model_name))
        # classifier.model = torch.load(args.model_dir)

    label_prob, label_pred = classifier.predict(sent, 5)
    for i in xrange(5):
        print(languages.lookup(i2l[label_pred[0][i]]).name)
        print(label_prob[0][i])
    return label_prob, label_pred
Ejemplo n.º 7
0
def get_lang_name(iso_639_code):
    lan = languages.lookup(iso_639_code).name.lower()

    return lan
Ejemplo n.º 8
0
def is_valid_iso_languages(lan: str) -> bool:
    try:
        res = languages.lookup(lan)
        return True
    except LookupError:
        return False