import argparse if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('-p', '--path', nargs='*', help='path to reference collection') parser.add_argument('-j', '--jobs', nargs='*', help='number of parallel jobs') parser.add_argument('-n', '--name', nargs='*', help='name of the model') parser.add_argument('-u', '--udpipe', nargs='*', help='path to udpipe model') args = parser.parse_args() syntax_length_tokenizer = udpipe_tokenizer.UdPipeTokenizer(args.udpipe[0]) syntax_length_complexity_function = syntax_length_cf.SyntaxLengthComplexityFunction( ) model = complexity_model.ComplexityModel(syntax_length_tokenizer, syntax_length_complexity_function, alphabet='reduced') model.fit(args.path[0], use_preproc=False, n_jobs=args.jobs[0]) model.dump(path='.', model_name=args.name[0])
# # Created by maks5507 ([email protected]) # from . import pathmagic pathmagic.add_to_path(2) from complexity import complexity_model from tokenizers import word_tokenizer from functions import counter_cf import argparse if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('-p', '--path', nargs='*', help='path to reference collection') parser.add_argument('-j', '--jobs', nargs='*', help='number of parallel jobs') parser.add_argument('-n', '--name', nargs='*', help='name of the model') parser.add_argument('-t', '--tf', nargs='*', help='path to tf pickle dump') parser.add_argument('-s', '--stopwords', nargs='*', help='path to stopwords.txt') args = parser.parse_args() word_tokenizer = word_tokenizer.WordTokenizer(stopwords=args.stopwords[0]) lexical_complexity_function = counter_cf.LexicalCounterComplexityFunction(args.tf[0]) model = complexity_model.ComplexityModel(word_tokenizer, lexical_complexity_function, alphabet='reduced') model.fit(args.path[0], use_preproc=False, n_jobs=args.jobs[0]) model.dump(path='.', model_name=args.name[0])
from functions import distance_cf import argparse if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('-p', '--path', nargs='*', help='path to reference collection') parser.add_argument('-j', '--jobs', nargs='*', help='number of parallel jobs') parser.add_argument('-n', '--name', nargs='*', help='name of the model') parser.add_argument('-s', '--stopwords', nargs='*', help='path to stopwords.txt') args = parser.parse_args() syllab_tokenizer = ru_syllab_tokenizer.RuSyllabTokenizer( stopwords=args.stopwords[0]) syllab_complexity_function = distance_cf.DistanceComplexityFunction() model = complexity_model.ComplexityModel(syllab_tokenizer, syllab_complexity_function) model.fit(args.path[0], use_preproc=False, n_jobs=args.jobs[0]) model.dump(path='.', model_name=args.name[0])
# # Created by maks5507 ([email protected]) # from . import pathmagic pathmagic.add_to_path(2) from complexity import complexity_model from tokenizers import udpipe_tokenizer_pos from functions import distance_cf import argparse if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('-p', '--path', nargs='*', help='path to reference collection') parser.add_argument('-j', '--jobs', nargs='*', help='number of parallel jobs') parser.add_argument('-n', '--name', nargs='*', help='name of the model') parser.add_argument('-u', '--udpipe', nargs='*', help='path to udpipe model') args = parser.parse_args() syntax_pos_tokenizer = udpipe_tokenizer_pos.UdPipePOSTokenizer(args.udpipe[0]) syntax_pos_complexity_function = distance_cf.DistanceComplexityFunction() model = complexity_model.ComplexityModel(syntax_pos_tokenizer, syntax_pos_complexity_function) model.fit(args.path[0], use_preproc=False, n_jobs=args.jobs[0]) model.dump(path='.', model_name=args.name[0])
from functions import distance_cf import argparse if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('-p', '--path', nargs='*', help='path to reference collection') parser.add_argument('-j', '--jobs', nargs='*', help='number of parallel jobs') parser.add_argument('-n', '--name', nargs='*', help='name of the model') parser.add_argument('-s', '--stopwords', nargs='*', help='path to stopwords.txt') args = parser.parse_args() word_tokenizer = word_tokenizer.WordTokenizer(stopwords=args.stopwords[0]) lexical_distance_complexity_function = distance_cf.DistanceComplexityFunction( ) model = complexity_model.ComplexityModel( word_tokenizer, lexical_distance_complexity_function) model.fit(args.path[0], use_preproc=False, n_jobs=args.jobs[0]) model.dump(path='.', model_name=args.name[0])