Python Corpus.dump 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: Corpus

클래스/타입: Corpus

메소드/함수: dump

hotexamples.com에서의 예제들: 2

Python Corpus.dump - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 Corpus.Corpus.dump에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Corpus(30)

find(5)

get_postag_set(4)

read(3)

__init__(2)

verificarPlagio(2)

add_source_document(2)

add_target_document(2)

get_file_name(2)

buildCorpus(2)

emails_as_string(2)

dump(2)

preprocess(2)

get_data(2)

read_ner(2)

outputWords(1)

pickledumpwords(1)

output_rules(1)

ner(1)

outputPOStags(1)

nettoyer_texte(1)

most_frequent_word_by_year(1)

most_frequent_word_by_month(1)

most_frequent_word_by_day(1)

most_frequent_word(1)

most_frequent_trigrams(1)

most_frequent_content_words(1)

picklegetwords(1)

read_label(1)

prepapre_to_matrix(1)

search_ambiguous(1)

vectoriserDocCorpus(1)

url_to_dir(1)

train_word2vec(1)

tag_words_with_most_likely_parses(1)

spanishTags(1)

set_lista_texto(1)

save_json(1)

process(1)

save(1)

results(1)

resetSentStats(1)

read_word2vec(1)

read_prediction(1)

load_json(1)

read_data(1)

most_frequent_bigrams(1)

get_instances(1)

lemmatiserCorpus(1)

calculSimilarite(1)

예제 #1

파일 보기

def downloadCorpus(snapshotDir, corpusDir, projectName, configInfo):

    # 2. Dump the snapshots for a project
    msg = '---------------------------------------------------- \n'
    msg += ' Dump the corpus for project %s \n' % projectName
    msg += '---------------------------------------------------- \n'
    print(msg)

    project_snapshot_dir = os.path.join(snapshotDir, projectName)
    project_corpus_dir = os.path.join(corpusDir, projectName)

    if os.path.isdir(project_corpus_dir):
        print "!! %s already exists...returning \n" % project_corpus_dir
        #return

    corpus = Corpus(project_snapshot_dir, 'java', project_corpus_dir,
                    configInfo)
    #logging.debug(corpus)
    #print corpus
    corpus.dump()

예제 #2

파일 보기

파일: run.py 프로젝트: saheel1115/szz

    if not os.path.isdir(args.proj_dir):
        print "!! Please provide a valid directory, given: %s" % (
            args.proj_dir)
        sys.exit()

    print "Going to process project %s for %s" % (args.proj_dir, args.lang)

    print "Creating output directory at %s" % (args.out_dir)

    Util.cleanup(args.log_file)

    #od = OutDir(args.out_dir)
    #od.create_out_dir(args.out_dir)

    Log.setLogger(args.verbose, args.log_file)

    cfg = Config(args.config_file)

    corpus = Corpus(args.proj_dir, args.lang, args.out_dir, cfg)
    logging.debug(corpus)
    corpus.dump()

    #print corpus.printSnapshots()
    #1. Fetch one line changes of a given project for a given language from the database
    #edits = fetchEdits(cfg, args.proj_dir, args.lang)

    #2. build the test corpus based on each edit
    #createTrainingCorpus(args.proj_dir, args.lang, args.out_dir, edits)
    #createTestCorpus(args.proj_dir, args.lang, od, edits)