def validate_datatypes(bag): """Assumes a valid bag/bag info; returns true if all datatypes in bag pass""" dates = [] langz = [] bag_dates_to_validate = ['Date_Start', 'Date_End', 'Bagging_Date'] bag_info_data = get_fields_from_file(join(str(bag), 'bag-info.txt')) for k, v in bag_info_data.iteritems(): if k in bag_dates_to_validate: dates.append(v) if k == 'Language': langz.append(v) if dates: for date in dates: try: iso8601.parse_date(date) except: print "invalid date: '{}'".format(date) return False if langz: for language in langz: try: languages.lookup(language) except: print "invalid language code: '{}'".format(language) return False return True
def get(cls, language): try: if PYCOUNTRY: c = languages.lookup(language) return Language(c.alpha_2, c.alpha_3, c.name, getattr(c, "bibliographic", None)) else: l = None if len(language) == 2: l = languages.get(alpha2=language) elif len(language) == 3: for code_type in ['part2b', 'part2t', 'part3']: try: l = languages.get(**{code_type: language}) break except KeyError: pass if not l: raise KeyError(language) else: raise KeyError(language) return Language(l.alpha2, l.part3, l.name, l.part2b or l.part2t) except (LookupError, KeyError): raise LookupError("Invalid language code: {0}".format(language))
def __guess_language(self, tokens): try: from langdetect import detect from pycountry import languages except ImportError: print( "The langdetect module is required for automated language detection; install with pip install langdetect" ) print("Reverting to english") return 'english' # Do language detection using langdetect # and map to full language name using pycountry words = [ w for w in tokens if regex.match(r'#?[^\W\d]{2,}$', w) and not self. RESERVED_WORDS_PATTERN.match(w) and not self.URL_PATTERN.match(w) ] if (len(words) > 0): try: language_short = detect(' '.join(words)) return languages.lookup( language_short.split('-')[0]).name.lower() except langdetect.lang_detect_exception.LangDetectException: print('Language detection failed on string: "' + ' '.join(words) + '", defaulting to English') return 'english' else: return 'none'
def translate_cmd(client: Client, message: Message): def translate_text(text_to_translate: str, dest_lang: str = environ.get('LANGUAGE'), src_lang: str = 'DETECT'): translator = Translator() result: str = '' for i in range(20): try: if src_lang != 'DETECT': result = translator.translate(text_to_translate, src=src_lang, dest=dest_lang).text else: result = translator.translate(text_to_translate, dest=dest_lang).text break except Exception: translator = Translator() return result words: List[str] = message.text.split(' ') if len(words) == 1: text = translate_text(message.reply_to_message.text) message.edit_text(text if text != '' else "1Couldn't translate...") return if ':' in words[1]: try: langs: List[str] = words[1].split(':') src: str = languages.lookup(langs[0]).name dest: str = languages.lookup(langs[1]).name text = ' '.join( words[2:]) if len(words) > 2 else message.reply_to_message.text text = translate_text(text, dest, src) message.edit_text(text if text != '' else "2Couldn't translate...") except LookupError: message.edit_text("Couldn't find language...") except Exception: message.edit_text("3Couldn't translate...") return try: text = ' '.join( words[2:]) if len(words) > 2 else message.reply_to_message.text text = translate_text(text, languages.lookup(words[1]).name) message.edit_text(text if text != '' else "4Couldn't translate...") except LookupError: text = translate_text(message.text) message.edit_text(text if text != '' else "5Couldn't translate...")
def interactive_shell(args_file): """Creates interactive shell to play with model Args: model: instance of Classification """ args = SaveloadHP.load(args_file) i2l = {} for k, v in args.vocab.l2i.iteritems(): i2l[v] = k print("Load Model from file: %s" % (args.model_name)) classifier = Classifier(args) ## load model need consider if the model trained in GPU and load in CPU, or vice versa if not use_cuda: classifier.model.load_state_dict( torch.load(args.model_name), map_location=lambda storage, loc: storage) # classifier.model = torch.load(args.model_dir, map_location=lambda storage, loc: storage) else: classifier.model.load_state_dict(torch.load(args.model_name)) # classifier.model = torch.load(args.model_dir) print(""" To exit, enter 'EXIT'. Enter a sentence like input> wth is it????""") while True: try: # for python 2 sentence = raw_input("input> ") except NameError: # for python 3 sentence = input("input> ") words_raw = sentence.strip() if words_raw == "EXIT": break words_raw = Encoder.str2uni(words_raw) label_prob, label_pred = classifier.predict(words_raw, 5) for i in xrange(5): print(languages.lookup(i2l[label_pred[0][i]]).name) print(label_prob[0][i])
def predict(sent, args_file): args = SaveloadHP.load(args_file) i2l = {} for k, v in args.vocab.l2i.iteritems(): i2l[v] = k print("Load Model from file: %s" % (args.model_name)) classifier = Classifier(args) ## load model need consider if the model trained in GPU and load in CPU, or vice versa if not use_cuda: classifier.model.load_state_dict( torch.load(args.model_name), map_location=lambda storage, loc: storage) # classifier.model = torch.load(args.model_dir, map_location=lambda storage, loc: storage) else: classifier.model.load_state_dict(torch.load(args.model_name)) # classifier.model = torch.load(args.model_dir) label_prob, label_pred = classifier.predict(sent, 5) for i in xrange(5): print(languages.lookup(i2l[label_pred[0][i]]).name) print(label_prob[0][i]) return label_prob, label_pred
def get_lang_name(iso_639_code): lan = languages.lookup(iso_639_code).name.lower() return lan
def is_valid_iso_languages(lan: str) -> bool: try: res = languages.lookup(lan) return True except LookupError: return False