# coding: utf-8 from bertsota.common.data import ParserVocabulary, DataLoader from bertsota.common.utils import init_logger from bertsota.parser.dep_parser import SDPParser for p in range(20, 100, 20): save_dir = 'data/model/bert-base-pas{}'.format(p) parser = SDPParser() parser.train( train_file='data/semeval15/en.pas.train.{}.conllu'.format(p), dev_file='data/semeval15/en.pas.dev.conllu', save_dir=save_dir, pretrained_embeddings_file='data/embedding/glove.6B.100d.shrinked.txt', bert_path=[ 'data/embedding/bert_base_sum/en.train.bert', 'data/embedding/bert_base_sum/en.dev.bert' ]) parser.load(save_dir) logger = init_logger(save_dir, 'test.log') parser.evaluate(test_file='data/semeval15/en.id.pas.conllu', bert_path='data/embedding/bert_base_sum/en.id.bert', save_dir=save_dir, logger=logger) parser.evaluate(test_file='data/semeval15/en.ood.pas.conllu', bert_path='data/embedding/bert_base_sum/en.ood.bert', save_dir=save_dir, logger=logger)
# coding: utf-8 import pickle from bertsota.common.data import ParserVocabulary, DataLoader from bertsota.common.utils import init_logger from bertsota.parser.dep_parser import SDPParser if __name__ == '__main__': save_dir = 'data/model/bert-dm-noword3' parser = SDPParser() parser.train(train_file='data/semeval15/en.dm.train.conllu', dev_file='data/semeval15/en.dm.dev.conllu', save_dir=save_dir, bert_path=['data/embedding/bert_base_sum/en.train.bert', 'data/embedding/bert_base_sum/en.dev.bert']) parser.load(save_dir) logger = init_logger(save_dir, 'test.log') parser.evaluate(test_file='data/semeval15/en.id.dm.conllu', save_dir=save_dir, bert_path='data/embedding/bert_base_sum/en.id.bert', logger=logger, debug=True) parser.evaluate(test_file='data/semeval15/en.ood.dm.conllu', save_dir=save_dir, bert_path='data/embedding/bert_base_sum/en.ood.bert', logger=logger, debug=True)
# or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. from bertsota.parser.dep_parser import SDPParser if __name__ == '__main__': save_dir = 'data/model/train-embedding-psd-id' parser = SDPParser() parser.train(train_file='data/semeval15/en.psd.conllu', dev_file='data/semeval15/en.id.psd.conllu', test_file='data/semeval15/en.id.psd.conllu', save_dir=save_dir, pretrained_embeddings_file='data/embedding/glove.6B.100d.txt') parser.load(save_dir) parser.evaluate(test_file='data/semeval15/en.id.psd.conllu', save_dir=save_dir, num_buckets_test=10)
# -*- coding:utf-8 -*- # Author: hankcs # Date: 2019-02-07 20:45 from bertsota.parser.dep_parser import SDPParser parser = SDPParser() save_dir = 'data/model/text-noword3' parser.train(train_file='data/SemEval-2016/text.train.conllu', dev_file='data/SemEval-2016/text.valid.conllu', save_dir=save_dir, bert_path=[ 'data/embedding/bert_base_sum/text.train.bert', 'data/embedding/bert_base_sum/text.valid.bert' ], root='root') parser.load(save_dir) parser.evaluate('data/SemEval-2016/text.test.conllu', save_dir, bert_path='data/embedding/bert_base_sum/text.test.bert', chinese=True)
# coding: utf-8 import pickle from bertsota.common.data import ParserVocabulary, DataLoader from bertsota.common.utils import init_logger from bertsota.parser.dep_parser import SDPParser if __name__ == '__main__': save_dir = 'data/model/bert-dm-sum' parser = SDPParser() parser.train( train_file='data/semeval15/en.dm.train.conllu', dev_file='data/semeval15/en.dm.dev.conllu', save_dir=save_dir, pretrained_embeddings_file='data/embedding/glove.6B.100d.shrinked.txt', bert_path=[ 'data/embedding/bert_large_sum/en.train.bert', 'data/embedding/bert_large_sum/en.dev.bert' ]) parser.load(save_dir) logger = init_logger(save_dir, 'test.log') parser.evaluate(test_file='data/semeval15/en.id.dm.conllu', save_dir=save_dir, bert_path='data/embedding/bert_large_sum/en.id.bert', logger=logger) parser.evaluate(test_file='data/semeval15/en.ood.dm.conllu', save_dir=save_dir, bert_path='data/embedding/bert_large_sum/en.ood.bert', logger=logger)
# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. from bertsota.parser.dep_parser import SDPParser if __name__ == '__main__': parser = SDPParser() parser.train(train_file='data/semeval15/en.psd.conllu', dev_file='data/semeval15/en.id.psd.conllu', test_file='data/semeval15/en.id.psd.conllu', save_dir='data/model/psd-id-eval', pretrained_embeddings_file='data/embedding/glove.6B.100d.txt', validate_every=1000) parser.load('data/model/psd-id') parser.evaluate(test_file='data/semeval15/en.id.psd.conllu', save_dir='data/model/psd-id-eval', num_buckets_test=10)
# or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. from bertsota.parser.dep_parser import SDPParser if __name__ == '__main__': parser = SDPParser() parser.train(train_file='data/semeval15/en.psd.conllu', dev_file='data/semeval15/en.id.psd.conllu', test_file='data/semeval15/en.id.psd.conllu', save_dir='data/model/beta-psd-id', beta_1=0, beta_2=.95, pretrained_embeddings_file='data/embedding/glove.6B.100d.txt') # parser.load('data/model/psd-id') # parser.evaluate(test_file='data/semeval15/en.id.psd.conllu', save_dir='data/model/psd-id', # num_buckets_test=10)
# # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. from bertsota.parser.dep_parser import SDPParser if __name__ == '__main__': parser = SDPParser() save_dir = 'data/model/csdg' parser.train(train_file='data/SemEval-2016/train/news.train.debug.conllu', dev_file='data/SemEval-2016/train/news.train.debug.conllu', save_dir=save_dir, pretrained_embeddings_file= 'data/embedding/glove/glove.6B.100d.debug.txt', train_iters=100, num_buckets_train=1, num_buckets_valid=1, validate_every=10, learning_rate=2e-3, root='Root', debug=True) parser.load(save_dir) parser.evaluate( test_file='data/SemEval-2016/train/news.train.debug.conllu', save_dir=save_dir, num_buckets_test=1)
# -*- coding:utf-8 -*- # Author: hankcs # Date: 2019-02-07 20:45 from bertsota.parser.dep_parser import SDPParser parser = SDPParser() save_dir = 'data/model/text-bert4' parser.train(train_file='data/SemEval-2016/text.train.conllu', dev_file='data/SemEval-2016/text.valid.conllu', save_dir=save_dir, word_dims=300, pretrained_embeddings_file='data/embedding/text.fasttext.300.txt', bert_path=[ 'data/embedding/bert_base_sum/text.train.bert', 'data/embedding/bert_base_sum/text.valid.bert' ], root='root') parser.load(save_dir) parser.evaluate('data/SemEval-2016/text.test.conllu', save_dir, bert_path='data/embedding/bert_base_sum/text.test.bert', chinese=True)
# coding: utf-8 # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. from bertsota.parser.dep_parser import SDPParser if __name__ == '__main__': parser = SDPParser() parser.train(train_file='data/semeval15/en.psd.conllu', dev_file='data/semeval15/en.id.psd.conllu', test_file='data/semeval15/en.id.psd.conllu', mlp_rel_size=500, save_dir='data/model/rel-500-psd-id', pretrained_embeddings_file='data/embedding/glove.6B.100d.txt')
# -*- coding:utf-8 -*- # Author: hankcs # Date: 2019-02-07 20:45 from bertsota.parser.dep_parser import SDPParser parser = SDPParser() save_dir = 'data/model/text-baseline4' parser.train(train_file='data/SemEval-2016/text.train.conllu', dev_file='data/SemEval-2016/text.valid.conllu', save_dir=save_dir, word_dims=300, pretrained_embeddings_file='data/embedding/text.fasttext.300.txt', root='root') parser.load(save_dir) parser.evaluate('data/SemEval-2016/text.test.conllu', save_dir, chinese=True)
# coding: utf-8 import pickle from bertsota.common.data import ParserVocabulary, DataLoader from bertsota.common.utils import init_logger from bertsota.parser.dep_parser import SDPParser if __name__ == '__main__': save_dir = 'data/model/cz6' parser = SDPParser() parser.train(train_file='data/semeval15/cz.pas.train.conllu', dev_file='data/semeval15/cz.pas.dev.conllu', save_dir=save_dir, word_dims=300, pretrained_embeddings_file='data/embedding/cz.fasttext.300.txt') parser.load(save_dir) logger = init_logger(save_dir, 'test.log') parser.evaluate(test_file='data/semeval15/cz.id.pas.conllu', save_dir=save_dir, logger=logger)
# coding: utf-8 import pickle from bertsota.common.data import ParserVocabulary, DataLoader from bertsota.common.utils import init_logger from bertsota.parser.dep_parser import SDPParser if __name__ == '__main__': save_dir = 'data/model/cz-bert3' parser = SDPParser() parser.train( train_file='data/semeval15/cz.pas.train.conllu', dev_file='data/semeval15/cz.pas.dev.conllu', save_dir=save_dir, word_dims=300, pretrained_embeddings_file='data/embedding/cz.fasttext.300.txt', bert_path=[ 'data/embedding/bert_base_sum/cz.pas.train.bert', 'data/embedding/bert_base_sum/cz.pas.dev.bert' ]) parser.load(save_dir) logger = init_logger(save_dir, 'test.log') parser.evaluate(test_file='data/semeval15/cz.id.pas.conllu', save_dir=save_dir, bert_path='data/embedding/bert_base_sum/cz.id.pas.bert', logger=logger)
# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. from bertsota.parser.dep_parser import SDPParser if __name__ == '__main__': parser = SDPParser() parser.train(train_file='data/semeval15/en.psd.conll', dev_file='data/semeval15/en.psd.conll', test_file='data/semeval15/en.psd.conll', save_dir='data/model/psd-id', pretrained_embeddings_file='data/embedding/glove.6B.100d.txt') parser.load('data/model/dep') parser.evaluate(test_file='data/semeval15/en.id.psd.conll', save_dir='data/model/over-fitting-psd-id', num_buckets_test=10)