import argparse

if __name__ == "__main__":

    parser = argparse.ArgumentParser()
    parser.add_argument('-p',
                        '--path',
                        nargs='*',
                        help='path to reference collection')
    parser.add_argument('-j',
                        '--jobs',
                        nargs='*',
                        help='number of parallel jobs')
    parser.add_argument('-n', '--name', nargs='*', help='name of the model')
    parser.add_argument('-u',
                        '--udpipe',
                        nargs='*',
                        help='path to udpipe model')
    args = parser.parse_args()

    syntax_length_tokenizer = udpipe_tokenizer.UdPipeTokenizer(args.udpipe[0])
    syntax_length_complexity_function = syntax_length_cf.SyntaxLengthComplexityFunction(
    )
    model = complexity_model.ComplexityModel(syntax_length_tokenizer,
                                             syntax_length_complexity_function,
                                             alphabet='reduced')
    model.fit(args.path[0], use_preproc=False, n_jobs=args.jobs[0])

    model.dump(path='.', model_name=args.name[0])
#
# Created by maks5507 ([email protected])
#

from . import pathmagic
pathmagic.add_to_path(2)

from complexity import complexity_model
from tokenizers import word_tokenizer
from functions import counter_cf

import argparse

if __name__ == "__main__":

    parser = argparse.ArgumentParser()
    parser.add_argument('-p', '--path', nargs='*', help='path to reference collection')
    parser.add_argument('-j', '--jobs', nargs='*', help='number of parallel jobs')
    parser.add_argument('-n', '--name', nargs='*', help='name of the model')
    parser.add_argument('-t', '--tf', nargs='*', help='path to tf pickle dump')
    parser.add_argument('-s', '--stopwords', nargs='*', help='path to stopwords.txt')
    args = parser.parse_args()

    word_tokenizer = word_tokenizer.WordTokenizer(stopwords=args.stopwords[0])
    lexical_complexity_function = counter_cf.LexicalCounterComplexityFunction(args.tf[0])
    model = complexity_model.ComplexityModel(word_tokenizer, lexical_complexity_function, alphabet='reduced')
    model.fit(args.path[0], use_preproc=False, n_jobs=args.jobs[0])

    model.dump(path='.', model_name=args.name[0])
예제 #3
0
from functions import distance_cf

import argparse

if __name__ == "__main__":

    parser = argparse.ArgumentParser()
    parser.add_argument('-p',
                        '--path',
                        nargs='*',
                        help='path to reference collection')
    parser.add_argument('-j',
                        '--jobs',
                        nargs='*',
                        help='number of parallel jobs')
    parser.add_argument('-n', '--name', nargs='*', help='name of the model')
    parser.add_argument('-s',
                        '--stopwords',
                        nargs='*',
                        help='path to stopwords.txt')
    args = parser.parse_args()

    syllab_tokenizer = ru_syllab_tokenizer.RuSyllabTokenizer(
        stopwords=args.stopwords[0])
    syllab_complexity_function = distance_cf.DistanceComplexityFunction()
    model = complexity_model.ComplexityModel(syllab_tokenizer,
                                             syllab_complexity_function)
    model.fit(args.path[0], use_preproc=False, n_jobs=args.jobs[0])

    model.dump(path='.', model_name=args.name[0])
#
# Created by maks5507 ([email protected])
#

from . import pathmagic
pathmagic.add_to_path(2)

from complexity import complexity_model
from tokenizers import udpipe_tokenizer_pos
from functions import distance_cf

import argparse

if __name__ == "__main__":

    parser = argparse.ArgumentParser()
    parser.add_argument('-p', '--path', nargs='*', help='path to reference collection')
    parser.add_argument('-j', '--jobs', nargs='*', help='number of parallel jobs')
    parser.add_argument('-n', '--name', nargs='*', help='name of the model')
    parser.add_argument('-u', '--udpipe', nargs='*', help='path to udpipe model')
    args = parser.parse_args()

    syntax_pos_tokenizer = udpipe_tokenizer_pos.UdPipePOSTokenizer(args.udpipe[0])
    syntax_pos_complexity_function = distance_cf.DistanceComplexityFunction()
    model = complexity_model.ComplexityModel(syntax_pos_tokenizer, syntax_pos_complexity_function)
    model.fit(args.path[0], use_preproc=False, n_jobs=args.jobs[0])

    model.dump(path='.', model_name=args.name[0])
예제 #5
0
from functions import distance_cf

import argparse

if __name__ == "__main__":

    parser = argparse.ArgumentParser()
    parser.add_argument('-p',
                        '--path',
                        nargs='*',
                        help='path to reference collection')
    parser.add_argument('-j',
                        '--jobs',
                        nargs='*',
                        help='number of parallel jobs')
    parser.add_argument('-n', '--name', nargs='*', help='name of the model')
    parser.add_argument('-s',
                        '--stopwords',
                        nargs='*',
                        help='path to stopwords.txt')
    args = parser.parse_args()

    word_tokenizer = word_tokenizer.WordTokenizer(stopwords=args.stopwords[0])
    lexical_distance_complexity_function = distance_cf.DistanceComplexityFunction(
    )
    model = complexity_model.ComplexityModel(
        word_tokenizer, lexical_distance_complexity_function)
    model.fit(args.path[0], use_preproc=False, n_jobs=args.jobs[0])

    model.dump(path='.', model_name=args.name[0])