Beispiel #1
0
torch.manual_seed(1) # CPU random seed
np.random.seed(1)

Softmax= nn.Softmax(dim=1)
argparser = argparse.ArgumentParser()
argparser.add_argument('--config_file', default='../configs/default.cfg')
args, extra_args = argparser.parse_known_args()
config = Configurable(args.config_file, extra_args)

use_cuda = config.use_cuda

# GPU and CPU using different random seed
if use_cuda:
    torch.cuda.manual_seed(1)

train_corpus = utils.load_entity_and_relation_sequences(config.train_file, sep="\t", schema=config.schema)
dev_corpus = utils.load_entity_and_relation_sequences(config.dev_file, sep="\t", schema=config.schema)
test_corpus = utils.load_entity_and_relation_sequences(config.test_file, sep="\t", schema=config.schema)

def load_json_file(filename):
    with open(filename, "r") as f:
        return json.load(f)

if config.parse_lstm_size != 0:
    train_lstm_h = load_json_file(config.parse_train_file)
    dev_lstm_h = load_json_file(config.parse_dev_file)
    test_lstm_h = load_json_file(config.parse_test_file)
else:
    train_lstm_h = [[[0.0, 0.0], [0.0, 0.0]] for _ in range(len(train_corpus))]
    dev_lstm_h = [[[0.0, 0.0], [0.0, 0.0]] for _ in range(len(dev_corpus))]
    test_lstm_h = [[[0.0, 0.0], [0.0, 0.0]] for _ in range(len(test_corpus))]
#  argparser.add_argument('--model', default='BaseParser')
args, extra_args = argparser.parse_known_args()
config = Configurable(args.config_file, extra_args)

use_cuda = config.use_cuda

# GPU and CPU using different random seed
if use_cuda:
    torch.cuda.manual_seed(1)
domain_num = len(config.train_file_list)
max_sent_len = config.max_sent_len

dom2corpus = []
for i in range(domain_num):
    corpus = {}
    corpus['train'] = utils.load_entity_and_relation_sequences(config.train_file_list[i], sep="\t", schema=config.schema)
    corpus['dev'] = utils.load_entity_and_relation_sequences(config.dev_file_list[i], sep="\t", schema=config.schema)
    corpus['test'] = utils.load_entity_and_relation_sequences(config.test_file_list[i], sep="\t", schema=config.schema)

    corpus['train'] = [e for e in corpus['train'] if len(e[0]) <= max_sent_len]
    corpus['dev'] = [e for e in corpus['dev'] if len(e[0]) <= max_sent_len]
    corpus['test'] = [e for e in corpus['test'] if len(e[0]) <= max_sent_len]

    corpus['word_vocab'] = vocab.Vocab("words", PAD="<PAD>", lower=True)
    corpus['char_vocab'] = vocab.Vocab("chars", PAD="<p>", lower=False)
    corpus['ent_span_vocab'] = vocab.Vocab("ent_spans", lower=False)
    corpus['chunk_vocab'] = vocab.Vocab("chunk_tags", lower=False)
    corpus['rel_vocab'] = vocab.Vocab("rel_tags", PAD="None", lower=False)

    utils.create_vocab(corpus['train'] + corpus['dev'] + corpus['test'],
                       [corpus['word_vocab'], corpus['ent_span_vocab'], corpus['chunk_vocab']],