# - stopwords : words to ignore from the documents # Loading ignore if exists _ignore=[] if os.path.exists('.ignore'): verbose('Loading files to ignore from: .ignore') with open('.ignore') as file: for line in file: _ignore.append(line.strip()) # Loading stopwords if exits stopwords=[] if os.path.exists(opts.stopwords): verbose('Loading stopwords: ',opts.stopwords) stopwords=docread.readstopwords(opts.stopwords) else: info('Stopwords file not found assuming, emtpy',opts.stopwords) # Loading main files ------------------------------------------------- # load problems or problem verbose('Loading files') problems=docread.problems( docread.dirproblems(dirname,known_pattern,unknown_pattern,_ignore, code=codes[opts.language][opts.genre])) # Loading answers file only for DEVELOPMENT OR TRAINNING MODE if opts.mode.startswith("train") or opts.mode.startswith("devel"): if opts.Answers: answers_file=opts.Answers
# - stopwords : words to ignore from the documents # Loading ignore if exists _ignore = [] if os.path.exists('.ignore'): verbose('Loading files to ignore from: .ignore') with open('.ignore') as file: for line in file: _ignore.append(line.strip()) # Loading stopwords if exits stopwords = [] fstopwords = opts.stopwords.format(codes[opts.language]['stopwords']) if os.path.exists(fstopwords): verbose('Loading stopwords: ', fstopwords) stopwords = docread.readstopwords(fstopwords) else: info('Stopwords file not found assuming, emtpy', opts.stopwords) # Loading main files ------------------------------------------------- # load problems or problem verbose('Loading files') problems = docread.problems( docread.dirproblems(dirname, known_pattern, unknown_pattern, _ignore, code=codes[opts.language][opts.genre])) verbose('Finish loading files') if opts.concatenate: