コード例 #1
0
ファイル: authorid.py プロジェクト: pombredanne/authorid
    # - stopwords : words to ignore from the documents

    # Loading ignore if exists
    _ignore=[]
    if os.path.exists('.ignore'):
        verbose('Loading files to ignore from: .ignore')
        with open('.ignore') as file:
            for line in file:
                _ignore.append(line.strip())


    # Loading stopwords if exits
    stopwords=[]
    if os.path.exists(opts.stopwords):
        verbose('Loading stopwords: ',opts.stopwords)
        stopwords=docread.readstopwords(opts.stopwords)
    else:
        info('Stopwords file not found assuming, emtpy',opts.stopwords)

    # Loading main files -------------------------------------------------
    # load problems or problem
    verbose('Loading files')
    problems=docread.problems(
        docread.dirproblems(dirname,known_pattern,unknown_pattern,_ignore,
                    code=codes[opts.language][opts.genre]))

   
    # Loading answers file only for DEVELOPMENT OR TRAINNING MODE
    if opts.mode.startswith("train") or opts.mode.startswith("devel"):
        if opts.Answers:
            answers_file=opts.Answers
コード例 #2
0
    # - stopwords : words to ignore from the documents

    # Loading ignore if exists
    _ignore = []
    if os.path.exists('.ignore'):
        verbose('Loading files to ignore from: .ignore')
        with open('.ignore') as file:
            for line in file:
                _ignore.append(line.strip())

    # Loading stopwords if exits
    stopwords = []
    fstopwords = opts.stopwords.format(codes[opts.language]['stopwords'])
    if os.path.exists(fstopwords):
        verbose('Loading stopwords: ', fstopwords)
        stopwords = docread.readstopwords(fstopwords)
    else:
        info('Stopwords file not found assuming, emtpy', opts.stopwords)

    # Loading main files -------------------------------------------------
    # load problems or problem
    verbose('Loading files')
    problems = docread.problems(
        docread.dirproblems(dirname,
                            known_pattern,
                            unknown_pattern,
                            _ignore,
                            code=codes[opts.language][opts.genre]))
    verbose('Finish loading files')

    if opts.concatenate: