Ejemplo n.º 1
0
def gen_y_test(args):
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"   
    os.environ["CUDA_VISIBLE_DEVICES"] = "0"
    # Dataset functions
    envityvectorpath = args.ev
    relationvectorpath = args.rv
    entityvector = loadvector(envityvectorpath)
    relationvector = loadvector(relationvectorpath)
    vector = dict(entityvector, **relationvector)
    print('Loading vectors.')
    input_vocab = Vocabulary(args.invocab, vector, padding=args.padding)
    output_vocab_entity = Vocabulary(args.evocab,
                                     vector, padding=args.padding)
    output_vocab_relation = Vocabulary(args.revocab,
                                       vector, padding=args.padding)

    print('Loading datasets.')
    #save y_test 
    test2 = Data(args.test_data, input_vocab, output_vocab_entity,output_vocab_relation)
    test2.load()
    target_list1 = test2.targets1
    #target_list2 = test2.targets2
    path = './results/y_test'
    with open(path, 'w') as f:
        for i in range(len(target_list1)):
            #f.write(str(i) + '\t'+target_list1[i]+'\t'+target_list2[i]+'\n')
            f.write(str(i) + '\t' + target_list1[i]  + '\n')
    print('ytest in file')
Ejemplo n.º 2
0
    def _build_dataset(self):
        self.start_id = start_id(self.output_vocab)
        self.end_id = end_id(self.output_vocab)
        data_file = ("./data/validation.csv"
                     if self.opts.infer else "./data/training.csv")
        data = Data(data_file, self.input_vocab, self.output_vocab)
        data.load()
        transform(data)
        vocab = (self.input_vocab, self.output_vocab)
        self.generator = DataGenerator(data, vocab, self.opts, self.start_id,
                                       self.end_id)
        items = next(self.generator)
        output_types = {i: tf.dtypes.as_dtype(items[i].dtype) for i in items}
        output_shapes = {i: tf.TensorShape(items[i].shape) for i in items}
        total_bytes = 0
        for i in items:
            total_bytes += items[i].nbytes
        dataset = tf.data.Dataset.from_generator(self.generator,
                                                 output_types=output_types,
                                                 output_shapes=output_shapes)
        infeed_queue = ipu_infeed_queue.IPUInfeedQueue(dataset,
                                                       "InfeedQueue",
                                                       replication_factor=1)
        data_init = infeed_queue.initializer

        return dataset, infeed_queue, data_init, vocab
Ejemplo n.º 3
0
def testmodel(args):
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"   # see issue #152
    os.environ["CUDA_VISIBLE_DEVICES"] = "0"
    # Dataset functions
    envityvectorpath = args.ev
    relationvectorpath = args.rv
    entityvector = loadvector(envityvectorpath)
    relationvector = loadvector(relationvectorpath)
    vector = dict(entityvector, **relationvector)
    print('Loading vectors.')
    input_vocab = Vocabulary(args.invocab, vector,padding=args.padding)
    output_vocab_entity = Vocabulary(args.evocab,
                              vector,padding=args.padding)
    output_vocab_relation = Vocabulary(args.revocab,
                                     vector, padding=args.padding)

    print('Loading datasets.')
    test=Data(args.test_data, input_vocab, output_vocab_entity,output_vocab_relation)
    test.load()
    test.transform(vector)

    print('Test Datasets Loaded.')

    model=load_model('./savemodel/model1.h5',custom_objects={'AttentionLayer': AttentionLayer})
    print('Model Loaded. Start test.')
    #prediction = model.predict([test.inputs1, test.inputs2,test.inputs3,test.inputs4, test.inputs5])
    prediction = model.predict([test.inputs1, test.inputs2, test.inputs3])

    #/result/y_pre
    p_prediction1 = list(prediction.flatten())
    #p_prediction2 = list(prediction[1].flatten())
    #num_entity = output_vocab_entity.size()
    num_relation = output_vocab_relation.size()
    # for m in range(int(len(p_prediction)/num)):
    #     prediction_list.append('')
    prediction_list1 = [[0 for col in range(num_relation)] for row in range(int(len(p_prediction1)/num_relation))]
    #prediction_list2 = [[0 for col in range(num_entity)] for row in range(int(len(p_prediction2) / num_entity))]
    for i in range(len(p_prediction1)):
        j = int(i / num_relation)
        k = i % num_relation
        prediction_list1[j][k]=[k,p_prediction1[i]]
    # for i in range(len(p_prediction2)):
    #     j = int(i / num_entity)
    #     k = i % num_entity
    #     prediction_list2[j][k]=[k,p_prediction2[i]]
    pretarget1 = []
    pretarget2 = []
    for i in range(len(prediction_list1)):
        templist1 = prediction_list1[i]
        templist1.sort(key=takeSecond, reverse=True)
        templist11 = output_vocab_relation.int_to_string(templist1)
        pretarget1.append(templist11[:5])
        pretarget2.append(templist1)
    listinfile(pretarget1, './results/y_pre1')
    listinfile(pretarget2, './results/y_pre2')
    print('ypre1 in file')
Ejemplo n.º 4
0
def main(args):
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"   # see issue #152
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
    # Dataset functions
    input_vocab = Vocabulary('./data/human_vocab.json', padding=args.padding)
    output_vocab = Vocabulary('./data/machine_vocab.json',
                              padding=args.padding)

    print('Loading datasets.')

    training = Data(args.training_data, input_vocab, output_vocab)
    validation = Data(args.validation_data, input_vocab, output_vocab)
    training.load()
    validation.load()
    training.transform()
    validation.transform()

    print('Datasets Loaded.')
    print('Compiling Model.')
    model = simpleNMT(pad_length=args.padding,
                      n_chars=input_vocab.size(),
                      n_labels=output_vocab.size(),
                      embedding_learnable=False,
                      encoder_units=256,
                      decoder_units=256,
                      trainable=True,
                      return_probabilities=False)

    model.summary()
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy', all_acc])
    print('Model Compiled.')
    print('Training. Ctrl+C to end early.')

    try:
        kwargs = dict(generator=training.generator(args.batch_size),
                      steps_per_epoch=100,
                      validation_data=validation.generator(args.batch_size),
                      validation_steps=100,
                      callbacks=[cp],
                      workers=1,
                      verbose=1,
                      epochs=args.epochs)
        model.fit_generator(**kwargs)

    except KeyboardInterrupt as e:
        print('Model training stopped early.')

    print('Model training complete.')

    run_examples(model, input_vocab, output_vocab)
Ejemplo n.º 5
0
def main(args):
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"   # see issue #152
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
    # Dataset functions
    input_vocab = Vocabulary('./data/human_vocab.json', padding=args.padding)
    output_vocab = Vocabulary('./data/machine_vocab.json',
                              padding=args.padding)

    print('Loading datasets.')

    training = Data(args.training_data, input_vocab, output_vocab)
    validation = Data(args.validation_data, input_vocab, output_vocab)
    training.load()
    validation.load()
    training.transform()
    validation.transform()

    print('Datasets Loaded.')
    print('Compiling Model.')
    model = simpleNMT(pad_length=args.padding,
                      n_chars=input_vocab.size(),
                      n_labels=output_vocab.size(),
                      embedding_learnable=False,
                      encoder_units=256,
                      decoder_units=256,
                      trainable=True,
                      return_probabilities=False)

    model.summary()
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy', all_acc])
    print('Model Compiled.')
    print('Training. Ctrl+C to end early.')

    try:
        model.fit_generator(generator=training.generator(args.batch_size),
                            steps_per_epoch=100,
                            validation_data=validation.generator(args.batch_size),
                            validation_steps=100,
                            callbacks=[cp],
                            workers=1,
                            verbose=1,
                            epochs=args.epochs)

    except KeyboardInterrupt as e:
        print('Model training stopped early.')

    print('Model training complete.')

    run_examples(model, input_vocab, output_vocab)
Ejemplo n.º 6
0
def main(args):
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"   # see issue #152
    os.environ["CUDA_VISIBLE_DEVICES"] = "0"
    # Dataset functions
    envityvectorpath =args.ev
    relationvectorpath =args.rv
    entityvector = loadvector(envityvectorpath)
    relationvector = loadvector(relationvectorpath)
    vector = dict(entityvector, **relationvector)
    print('Loading vectors.')
    input_vocab = Vocabulary(args.invocab, vector, padding=args.padding)
    output_vocab_entity = Vocabulary(args.evocab,
                                     vector, padding=args.padding)
    output_vocab_relation = Vocabulary(args.revocab,
                                       vector, padding=args.padding)

    print('Loading datasets.')

    training = Data(args.training_data, input_vocab, output_vocab_entity,output_vocab_relation)
    validation = Data(args.validation_data, input_vocab, output_vocab_entity,output_vocab_relation)
    test=Data(args.test_data, input_vocab, output_vocab_entity,output_vocab_relation)
    training.load()
    validation.load()
    test.load()
    training.transform(vector)
    validation.transform(vector)
    test.transform(vector)

    print('Datasets Loaded.')
    print('Compiling Model.')
    model = simpleNMT2(pad_length=args.padding,
                      n_chars=100,
                      entity_labels=output_vocab_entity.size(),
                      relation_labels=output_vocab_relation.size(),
                      dim=100,
                      embedding_learnable=False,
                      encoder_units=args.units,
                      decoder_units=args.units,
                      trainable=True,
                      return_probabilities=False,
                      )

    model.summary()
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    print('Model Compiled.')
    print('Training. Ctrl+C to end early.')

    try:
        hist=model.fit([training.inputs1,training.inputs2,training.inputs3,training.inputs4,training.inputs5],[training.targets1],epochs=args.epochs,batch_size=args.batch_size,validation_split=0.05)


    except KeyboardInterrupt as e:
        print('Model training stopped early.')
    model.save('./savemodel/model1.h5')
    print('Model training complete.')