コード例 #1
0
ファイル: p1.py プロジェクト: zsxh/Coursera_NLP_MC
def main():
    train_data_filename = 'parse_train.dat'
    train_rare_filename = 'p1.train.rare.dat'
    pcfg_model_filename = 'parser_train.counts.out'

    pcfg = PCFG()
    for l in open(train_data_filename):
        t = json.loads(l)
        pcfg.count(t)
    pcfg.count_word()

    process_rare_words(open(train_data_filename), open(train_rare_filename,
                                                       'w'), pcfg.rare_words,
                       rare_words_rule_p1)

    new_pcfg = PCFG()
    for l in open(train_rare_filename):
        t = json.loads(l)
        new_pcfg.count(t)
    new_pcfg.cal_rule_params()

    new_pcfg.write(open(pcfg_model_filename, 'w'))
コード例 #2
0
ファイル: p2.py プロジェクト: xunyuw/Coursera_NLP_MC
def train(train_data_filename, train_rare_filename, pcfg_model_filename,
          rare_words_rule):
    print 'train PCFG model'
    pcfg = PCFG()
    for l in open(train_data_filename):
        t = json.loads(l)
        pcfg.count(t)
    pcfg.count_word()

    print 'process rare word'
    process_rare_words(open(train_data_filename), open(train_rare_filename,
                                                       'w'), pcfg.rare_words,
                       rare_words_rule)

    print 'train PCFG model again'
    new_pcfg = PCFG()
    for l in open(train_rare_filename):
        t = json.loads(l)
        new_pcfg.count(t)
    new_pcfg.cal_rule_params()

    new_pcfg.write(open(pcfg_model_filename, 'w'))
    return new_pcfg
コード例 #3
0
ファイル: p1.py プロジェクト: M4573R/Coursera_NLP_MC
def main():
    train_data_filename = 'parse_train.dat'
    train_rare_filename = 'p1.train.rare.dat'
    pcfg_model_filename = 'parser_train.counts.out'

    pcfg = PCFG()
    for l in open(train_data_filename):
        t = json.loads(l)
        pcfg.count(t)
    pcfg.count_word()

    process_rare_words(open(train_data_filename),
        open(train_rare_filename, 'w'),
        pcfg.rare_words,
        rare_words_rule_p1)

    new_pcfg = PCFG()
    for l in open(train_rare_filename):
        t = json.loads(l)
        new_pcfg.count(t)
    new_pcfg.cal_rule_params()

    new_pcfg.write(open(pcfg_model_filename, 'w'))
コード例 #4
0
ファイル: p2.py プロジェクト: M4573R/Coursera_NLP_MC
def train(train_data_filename, train_rare_filename, pcfg_model_filename, rare_words_rule):
    print 'train PCFG model'
    pcfg = PCFG()
    for l in open(train_data_filename):
        t = json.loads(l)
        pcfg.count(t)
    pcfg.count_word()

    print 'process rare word'
    process_rare_words(open(train_data_filename),
        open(train_rare_filename, 'w'),
        pcfg.rare_words,
        rare_words_rule)

    print 'train PCFG model again'
    new_pcfg = PCFG()
    for l in open(train_rare_filename):
        t = json.loads(l)
        new_pcfg.count(t)
    new_pcfg.cal_rule_params()

    new_pcfg.write(open(pcfg_model_filename, 'w'))
    return new_pcfg