def load(self, device: str): """ Load user-selected task-specific model Args: device (str): device information Returns: object: User-selected task-specific model """ from word2word import Word2word model = Word2word(self.config.lang, self._tgt) return PororoWord2Word(model, self.config)
def main(folder_path, lang): os.chdir(folder_path) files = os.listdir(folder_path) for index, file in enumerate(files): if file.endswith(".csv") and lang in file: print(file) df = pd.read_csv(file, dtype=str, sep='\t') i = 0 with open(lang, 'a') as f: l = convert_iso_code(lang) with open(lang + '.en', 'a') as e: with open(lang + '.' + l, 'a') as t: for i, row in df.iterrows(): if isinstance(row['source_text'], str) and isinstance( row['target_text'], str ) and row['item_type'] != 'RESPONSE': source = remove_punctuation_and_lower_case( row['source_text']) target = remove_punctuation_and_lower_case( row['target_text']) f.write(str(i) + '|' + source + '|' + target) i += 1 f.write("\n") e.write(str(i) + '|' + source) e.write("\n") t.write(str(i) + '|' + target) t.write("\n") f.close() e.close() t.close() print(folder_path + "/" + lang + "_dict") mcsq_dict = Word2word.make("en", l, folder_path + "/" + lang, savedir=folder_path)
#!/usr/bin/env python # - *- coding: utf- 8 - *- from word2word import Word2word dict_path = '/Volumes/Data/dataset/word2word' with open('word2word/supporting_languages.txt') as f: lines = f.readlines() for dict_pair in lines: codes = dict_pair.strip('\n').split('-') print("getting dictionary %s-%s" % (codes[0], codes[1])) w2w = Word2word(codes[0], codes[1], dict_path=dict_path)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--lang1', type=str, required=True, help="ISO 639-1 code of language. " "See `http://opus.nlpl.eu/OpenSubtitles2018.php`") parser.add_argument('--lang2', type=str, required=True, help="ISO 639-1 code of language. " "See `http://opus.nlpl.eu/OpenSubtitles2018.php`") parser.add_argument('--datapref', type=str, default=None, help="data prefix to a custom parallel corpus. " "builds a bilingual lexicon using OpenSubtitles2018 " "unless this option is provided.") parser.add_argument('--n_lines', type=int, default=100000000, help="number of parallel sentences used") parser.add_argument( '--cutoff', type=int, default=5000, help= "number of words that are used in calculating collocates within each language" ) parser.add_argument( '--rerank_width', default=100, type=int, help="maximum number of target-side collocates considered for reranking" ) parser.add_argument( '--rerank_impl', default="multiprocessing", type=str, help= "choice of reranking implementation: simple, multiprocessing (default)" ) parser.add_argument('--cased', dest="cased", action="store_true", help="Keep the case.") parser.add_argument('--n_translations', type=int, default=10, help="number of final word2word translations kept") parser.add_argument('--save_cooccurrence', dest="save_cooccurrence", action="store_true", help="Save the cooccurrence results") parser.add_argument('--save_pmi', dest="save_pmi", action="store_true", help="Save the pmi results") parser.add_argument('--savedir', type=str, default=None, help="location to store bilingual lexicons." "make sure to use this input when loading from " "a custom-bulit lexicon.") parser.add_argument('--num_workers', default=16, type=int, help="number of workers used for multiprocessing") args = parser.parse_args() Word2word.make(**vars(args))
from word2word import Word2word en2fr = Word2word("en","fr") #French en2es = Word2word("en","es") #Spanish en2it = Word2word("en","it") #Italian en2de = Word2word("en","de") #German en2ta = Word2word("en","ta") en2te = Word2word("en","te") def translate(english_word,caseTranslate): print("Yaha Translate.py ka ilaka start hota hai") list=english_word.split(" ") print(list) try: if(caseTranslate==1): for word in list: trans=en2fr(word)[0] english_word=english_word.replace(word,trans) return english_word elif(caseTranslate==2): for word in list: trans=en2es(word)[0] english_word=english_word.replace(word,trans) return english_word elif(caseTranslate==3): for word in list: trans=en2it(word)[0] english_word=english_word.replace(word,trans)
#!/usr/bin/env python # - *- coding: utf- 8 - *- from word2word import Word2word dict_path = '/Volumes/Data/dataset/word2word' en2fr = Word2word("en", "fr", dict_path=dict_path) # out: ['pomme', 'pommes', 'pommier', 'tartes', 'fleurs'] print '%s' % ','.join(map(str, en2fr("apple"))) fr2en = Word2word("fr", "en", dict_path=dict_path) print '%s' % ','.join(map(str, fr2en("pomme", n_best=2))) # out: ['travaillé', 'travaillait'] print '%s' % ','.join(map(str, en2fr("worked", n_best=2))) en2zh = Word2word("en", "zh_cn", dict_path=dict_path) # out: ['老师', '教师', '学生', '导师', '墨盒'] print '%s' % ','.join(map(str, en2zh("teacher"))) zh2en = Word2word("zh_cn", "en", dict_path=dict_path) print '%s' % ','.join(map(str, zh2en("老师"))) hi2en = Word2word("hi", "en", dict_path=dict_path) print '%s' % ','.join(map(str, hi2en("मिलने")))