Ejemplo n.º 1
0
# full path to data will be: ./data + dataset + train/test/valid
if arg.dataset == None:
    print("name of dataset can not be None")
    exit(1)
elif arg.dataset == "snips":
    print("use snips dataset")
elif arg.dataset == "atis":
    print("use atis dataset")
else:
    print("use own dataset: ", arg.dataset)
full_train_path = os.path.join("./data", arg.dataset, arg.train_data_path)
full_test_path = os.path.join("./data", arg.dataset, arg.test_data_path)
full_valid_path = os.path.join("./data", arg.dataset, arg.valid_data_path)

createVocabulary(
    os.path.join(full_train_path, arg.input_file),
    os.path.join(arg.vocab_path, "in_vocab"),
)
createVocabulary(
    os.path.join(full_train_path, arg.slot_file),
    os.path.join(arg.vocab_path, "slot_vocab"),
)
createVocabulary(
    os.path.join(full_train_path, arg.intent_file),
    os.path.join(arg.vocab_path, "intent_vocab"),
)

in_vocab = loadVocabulary(os.path.join(arg.vocab_path, "in_vocab"))
slot_vocab = loadVocabulary(os.path.join(arg.vocab_path, "slot_vocab"))
intent_vocab = loadVocabulary(os.path.join(arg.vocab_path, "intent_vocab"))

Ejemplo n.º 2
0
    exit(1)

if arg.dataset == None:
    print('name of dataset can not be None')
    exit(1)
elif arg.dataset == 'snips':
    print('use snips dataset')
elif arg.dataset == 'atis':
    print('use atis dataset')
else:
    print('use own dataset: ', arg.dataset)
full_train_path = os.path.join('./data', arg.dataset, arg.train_data_path)
full_test_path = os.path.join('./data', arg.dataset, arg.test_data_path)
full_valid_path = os.path.join('./data', arg.dataset, arg.valid_data_path)

createVocabulary(os.path.join(full_train_path, arg.input_file),
                 os.path.join(arg.vocab_path, 'in_vocab'))
createVocabulary(os.path.join(full_train_path, arg.slot_file),
                 os.path.join(arg.vocab_path, 'slot_vocab'))
createVocabulary(os.path.join(full_train_path, arg.intent_file),
                 os.path.join(arg.vocab_path, 'intent_vocab'),
                 no_pad=True)

in_vocab = loadVocabulary(os.path.join(arg.vocab_path, 'in_vocab'))
slot_vocab = loadVocabulary(os.path.join(arg.vocab_path, 'slot_vocab'))
intent_vocab = loadVocabulary(os.path.join(arg.vocab_path, 'intent_vocab'))


def createModel(input_data,
                input_size,
                sequence_length,
                slots,
Ejemplo n.º 3
0
#full path to data will be: ./data + dataset + train/test/valid
if arg.dataset == None:
    print('name of dataset can not be None')
    exit(1)
elif arg.dataset == 'snips':
    print('use snips dataset')
elif arg.dataset == 'atis':
    print('use atis dataset')
else:
    print('use own dataset: ',arg.dataset)
full_train_path = os.path.join('./data',arg.dataset,arg.train_data_path)
full_test_path = os.path.join('./data',arg.dataset,arg.test_data_path)
full_valid_path = os.path.join('./data',arg.dataset,arg.valid_data_path)

createVocabulary(os.path.join(full_train_path, arg.input_file), os.path.join(arg.vocab_path, 'in_vocab'))
createVocabulary(os.path.join(full_train_path, arg.slot_file), os.path.join(arg.vocab_path, 'slot_vocab'))
createVocabulary(os.path.join(full_train_path, arg.intent_file), os.path.join(arg.vocab_path, 'intent_vocab'))

in_vocab = loadVocabulary(os.path.join(arg.vocab_path, 'in_vocab'))
slot_vocab = loadVocabulary(os.path.join(arg.vocab_path, 'slot_vocab'))
intent_vocab = loadVocabulary(os.path.join(arg.vocab_path, 'intent_vocab'))

def createModel(input_data, input_size, sequence_length, slot_size, intent_size, layer_size = 128, isTraining = True):
    cell_fw = tf.contrib.rnn.BasicLSTMCell(layer_size)
    cell_bw = tf.contrib.rnn.BasicLSTMCell(layer_size)

    if isTraining == True:
        cell_fw = tf.contrib.rnn.DropoutWrapper(cell_fw, input_keep_prob=0.5,
                                             output_keep_prob=0.5)
        cell_bw = tf.contrib.rnn.DropoutWrapper(cell_bw, input_keep_prob=0.5,
Ejemplo n.º 4
0
#full path to data will be: ./data + dataset + train/test/valid
if arg.dataset == None:
    print('name of dataset can not be None')
    exit(1)
elif arg.dataset == 'snips':
    print('use snips dataset')
elif arg.dataset == 'atis':
    print('use atis dataset')
else:
    print('use own dataset: ', arg.dataset)
full_train_path = os.path.join('./data', arg.dataset, arg.train_data_path)
full_test_path = os.path.join('./data', arg.dataset, arg.test_data_path)
full_valid_path = os.path.join('./data', arg.dataset, arg.valid_data_path)

createVocabulary(os.path.join(full_train_path, arg.input_file),
                 os.path.join(arg.vocab_path, 'in_vocab'))
createVocabulary(os.path.join(full_train_path, arg.slot_file),
                 os.path.join(arg.vocab_path, 'slot_vocab'))
createVocabulary(os.path.join(full_train_path, arg.intent_file),
                 os.path.join(arg.vocab_path, 'intent_vocab'))
createVocabulary(os.path.join(full_train_path, arg.intent_file_one),
                 os.path.join(arg.vocab_path, 'intent_one_vocab'))
createVocabulary(os.path.join(full_train_path, arg.intent_file_two),
                 os.path.join(arg.vocab_path, 'intent_two_vocab'))

in_vocab = loadVocabulary(os.path.join(arg.vocab_path, 'in_vocab'))
slot_vocab = loadVocabulary(os.path.join(arg.vocab_path, 'slot_vocab'))
intent_vocab = loadVocabulary(os.path.join(arg.vocab_path, 'intent_vocab'))
intent_one_vocab = loadVocabulary(
    os.path.join(arg.vocab_path, 'intent_one_vocab'))
intent_two_vocab = loadVocabulary(
Ejemplo n.º 5
0
#Print arguments
for k, v in sorted(vars(arg).items()):
    print(k, '=', v)

if arg.model_type == 'full':
    add_final_state_to_intent = True
    remove_slot_attn = False
elif arg.model_type == 'intent_only':
    add_final_state_to_intent = True
    remove_slot_attn = True
else:
    print('unknown model type!')
    exit(1)

createVocabulary(os.path.join(arg.train_data_path, arg.input_file),
                 os.path.join(arg.vocab_path, 'in_vocab'))
createVocabulary(os.path.join(arg.train_data_path, arg.slot_file),
                 os.path.join(arg.vocab_path, 'slot_vocab'))
createVocabulary(os.path.join(arg.train_data_path, arg.intent_file),
                 os.path.join(arg.vocab_path, 'intent_vocab'))

in_vocab = loadVocabulary(os.path.join(arg.vocab_path, 'in_vocab'))
slot_vocab = loadVocabulary(os.path.join(arg.vocab_path, 'slot_vocab'))
intent_vocab = loadVocabulary(os.path.join(arg.vocab_path, 'intent_vocab'))


def createModel(input_data,
                input_size,
                sequence_length,
                slot_size,
                intent_size,
Ejemplo n.º 6
0
else:
    print('use own dataset: ', arg.dataset)

full_train_path = os.path.join('../input_data', arg.dataset,
                               arg.train_data_path, arg.input_file)
full_valid_path = os.path.join('../input_data', arg.dataset,
                               arg.valid_data_path, arg.input_file)
full_test_path = os.path.join('../input_data', arg.dataset, arg.test_data_path,
                              arg.input_file)
full_inference_path = os.path.join('../input_data', arg.dataset,
                                   arg.test_data_path, arg.inference_file)
full_inference_label_path = os.path.join('../input_data', arg.dataset,
                                         arg.test_data_path,
                                         arg.inference_label_file)
createVocabulary("../input_data/" + arg.dataset + "/" + arg.embed_path,
                 "../input_data/" + arg.dataset + "/in_vocab",
                 pad=True,
                 unk=True)
in_vocab = loadVocabulary("../input_data/" + arg.dataset + "/in_vocab")
logging.info("vocab created")

# Create Training Model
with tf.variable_scope('triplet_model'):
    global_step = tf.Variable(0, trainable=False, name='global_step')
    model = TripletModel(arg)
    outputs = model.build_model()

with tf.variable_scope('loss'):
    cos_an, cos_ap = outputs
    loss = tf.maximum(0.0, cos_an - cos_ap + arg.margin)

params = tf.trainable_variables()