Python Word2word.make Examples

Programming Language: Python

Namespace/Package Name: word2word

Class/Type: Word2word

Method/Function: make

Examples at hotexamples.com: 2

Python Word2word.make - 2 examples found. These are the top rated real world Python examples of word2word.Word2word.make extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Word2word(4)

make(2)

Frequently Used Methods

Word2word (4)

make (2)

Example #1

Show file

def main(folder_path, lang):
    os.chdir(folder_path)
    files = os.listdir(folder_path)
    for index, file in enumerate(files):
        if file.endswith(".csv") and lang in file:
            print(file)
            df = pd.read_csv(file, dtype=str, sep='\t')
            i = 0
            with open(lang, 'a') as f:
                l = convert_iso_code(lang)
                with open(lang + '.en', 'a') as e:
                    with open(lang + '.' + l, 'a') as t:
                        for i, row in df.iterrows():
                            if isinstance(row['source_text'],
                                          str) and isinstance(
                                              row['target_text'], str
                                          ) and row['item_type'] != 'RESPONSE':
                                source = remove_punctuation_and_lower_case(
                                    row['source_text'])
                                target = remove_punctuation_and_lower_case(
                                    row['target_text'])
                                f.write(str(i) + '|' + source + '|' + target)
                                i += 1
                                f.write("\n")
                                e.write(str(i) + '|' + source)
                                e.write("\n")
                                t.write(str(i) + '|' + target)
                                t.write("\n")
            f.close()
            e.close()
            t.close()

    print(folder_path + "/" + lang + "_dict")
    mcsq_dict = Word2word.make("en",
                               l,
                               folder_path + "/" + lang,
                               savedir=folder_path)

Example #2

Show file

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--lang1',
                        type=str,
                        required=True,
                        help="ISO 639-1 code of language. "
                        "See `http://opus.nlpl.eu/OpenSubtitles2018.php`")
    parser.add_argument('--lang2',
                        type=str,
                        required=True,
                        help="ISO 639-1 code of language. "
                        "See `http://opus.nlpl.eu/OpenSubtitles2018.php`")
    parser.add_argument('--datapref',
                        type=str,
                        default=None,
                        help="data prefix to a custom parallel corpus. "
                        "builds a bilingual lexicon using OpenSubtitles2018 "
                        "unless this option is provided.")
    parser.add_argument('--n_lines',
                        type=int,
                        default=100000000,
                        help="number of parallel sentences used")
    parser.add_argument(
        '--cutoff',
        type=int,
        default=5000,
        help=
        "number of words that are used in calculating collocates within each language"
    )
    parser.add_argument(
        '--rerank_width',
        default=100,
        type=int,
        help="maximum number of target-side collocates considered for reranking"
    )
    parser.add_argument(
        '--rerank_impl',
        default="multiprocessing",
        type=str,
        help=
        "choice of reranking implementation: simple, multiprocessing (default)"
    )
    parser.add_argument('--cased',
                        dest="cased",
                        action="store_true",
                        help="Keep the case.")
    parser.add_argument('--n_translations',
                        type=int,
                        default=10,
                        help="number of final word2word translations kept")
    parser.add_argument('--save_cooccurrence',
                        dest="save_cooccurrence",
                        action="store_true",
                        help="Save the cooccurrence results")
    parser.add_argument('--save_pmi',
                        dest="save_pmi",
                        action="store_true",
                        help="Save the pmi results")
    parser.add_argument('--savedir',
                        type=str,
                        default=None,
                        help="location to store bilingual lexicons."
                        "make sure to use this input when loading from "
                        "a custom-bulit lexicon.")
    parser.add_argument('--num_workers',
                        default=16,
                        type=int,
                        help="number of workers used for multiprocessing")
    args = parser.parse_args()

    Word2word.make(**vars(args))