# full path to data will be: ./data + dataset + train/test/valid if arg.dataset == None: print("name of dataset can not be None") exit(1) elif arg.dataset == "snips": print("use snips dataset") elif arg.dataset == "atis": print("use atis dataset") else: print("use own dataset: ", arg.dataset) full_train_path = os.path.join("./data", arg.dataset, arg.train_data_path) full_test_path = os.path.join("./data", arg.dataset, arg.test_data_path) full_valid_path = os.path.join("./data", arg.dataset, arg.valid_data_path) createVocabulary( os.path.join(full_train_path, arg.input_file), os.path.join(arg.vocab_path, "in_vocab"), ) createVocabulary( os.path.join(full_train_path, arg.slot_file), os.path.join(arg.vocab_path, "slot_vocab"), ) createVocabulary( os.path.join(full_train_path, arg.intent_file), os.path.join(arg.vocab_path, "intent_vocab"), ) in_vocab = loadVocabulary(os.path.join(arg.vocab_path, "in_vocab")) slot_vocab = loadVocabulary(os.path.join(arg.vocab_path, "slot_vocab")) intent_vocab = loadVocabulary(os.path.join(arg.vocab_path, "intent_vocab"))
exit(1) if arg.dataset == None: print('name of dataset can not be None') exit(1) elif arg.dataset == 'snips': print('use snips dataset') elif arg.dataset == 'atis': print('use atis dataset') else: print('use own dataset: ', arg.dataset) full_train_path = os.path.join('./data', arg.dataset, arg.train_data_path) full_test_path = os.path.join('./data', arg.dataset, arg.test_data_path) full_valid_path = os.path.join('./data', arg.dataset, arg.valid_data_path) createVocabulary(os.path.join(full_train_path, arg.input_file), os.path.join(arg.vocab_path, 'in_vocab')) createVocabulary(os.path.join(full_train_path, arg.slot_file), os.path.join(arg.vocab_path, 'slot_vocab')) createVocabulary(os.path.join(full_train_path, arg.intent_file), os.path.join(arg.vocab_path, 'intent_vocab'), no_pad=True) in_vocab = loadVocabulary(os.path.join(arg.vocab_path, 'in_vocab')) slot_vocab = loadVocabulary(os.path.join(arg.vocab_path, 'slot_vocab')) intent_vocab = loadVocabulary(os.path.join(arg.vocab_path, 'intent_vocab')) def createModel(input_data, input_size, sequence_length, slots,
#full path to data will be: ./data + dataset + train/test/valid if arg.dataset == None: print('name of dataset can not be None') exit(1) elif arg.dataset == 'snips': print('use snips dataset') elif arg.dataset == 'atis': print('use atis dataset') else: print('use own dataset: ',arg.dataset) full_train_path = os.path.join('./data',arg.dataset,arg.train_data_path) full_test_path = os.path.join('./data',arg.dataset,arg.test_data_path) full_valid_path = os.path.join('./data',arg.dataset,arg.valid_data_path) createVocabulary(os.path.join(full_train_path, arg.input_file), os.path.join(arg.vocab_path, 'in_vocab')) createVocabulary(os.path.join(full_train_path, arg.slot_file), os.path.join(arg.vocab_path, 'slot_vocab')) createVocabulary(os.path.join(full_train_path, arg.intent_file), os.path.join(arg.vocab_path, 'intent_vocab')) in_vocab = loadVocabulary(os.path.join(arg.vocab_path, 'in_vocab')) slot_vocab = loadVocabulary(os.path.join(arg.vocab_path, 'slot_vocab')) intent_vocab = loadVocabulary(os.path.join(arg.vocab_path, 'intent_vocab')) def createModel(input_data, input_size, sequence_length, slot_size, intent_size, layer_size = 128, isTraining = True): cell_fw = tf.contrib.rnn.BasicLSTMCell(layer_size) cell_bw = tf.contrib.rnn.BasicLSTMCell(layer_size) if isTraining == True: cell_fw = tf.contrib.rnn.DropoutWrapper(cell_fw, input_keep_prob=0.5, output_keep_prob=0.5) cell_bw = tf.contrib.rnn.DropoutWrapper(cell_bw, input_keep_prob=0.5,
#full path to data will be: ./data + dataset + train/test/valid if arg.dataset == None: print('name of dataset can not be None') exit(1) elif arg.dataset == 'snips': print('use snips dataset') elif arg.dataset == 'atis': print('use atis dataset') else: print('use own dataset: ', arg.dataset) full_train_path = os.path.join('./data', arg.dataset, arg.train_data_path) full_test_path = os.path.join('./data', arg.dataset, arg.test_data_path) full_valid_path = os.path.join('./data', arg.dataset, arg.valid_data_path) createVocabulary(os.path.join(full_train_path, arg.input_file), os.path.join(arg.vocab_path, 'in_vocab')) createVocabulary(os.path.join(full_train_path, arg.slot_file), os.path.join(arg.vocab_path, 'slot_vocab')) createVocabulary(os.path.join(full_train_path, arg.intent_file), os.path.join(arg.vocab_path, 'intent_vocab')) createVocabulary(os.path.join(full_train_path, arg.intent_file_one), os.path.join(arg.vocab_path, 'intent_one_vocab')) createVocabulary(os.path.join(full_train_path, arg.intent_file_two), os.path.join(arg.vocab_path, 'intent_two_vocab')) in_vocab = loadVocabulary(os.path.join(arg.vocab_path, 'in_vocab')) slot_vocab = loadVocabulary(os.path.join(arg.vocab_path, 'slot_vocab')) intent_vocab = loadVocabulary(os.path.join(arg.vocab_path, 'intent_vocab')) intent_one_vocab = loadVocabulary( os.path.join(arg.vocab_path, 'intent_one_vocab')) intent_two_vocab = loadVocabulary(
#Print arguments for k, v in sorted(vars(arg).items()): print(k, '=', v) if arg.model_type == 'full': add_final_state_to_intent = True remove_slot_attn = False elif arg.model_type == 'intent_only': add_final_state_to_intent = True remove_slot_attn = True else: print('unknown model type!') exit(1) createVocabulary(os.path.join(arg.train_data_path, arg.input_file), os.path.join(arg.vocab_path, 'in_vocab')) createVocabulary(os.path.join(arg.train_data_path, arg.slot_file), os.path.join(arg.vocab_path, 'slot_vocab')) createVocabulary(os.path.join(arg.train_data_path, arg.intent_file), os.path.join(arg.vocab_path, 'intent_vocab')) in_vocab = loadVocabulary(os.path.join(arg.vocab_path, 'in_vocab')) slot_vocab = loadVocabulary(os.path.join(arg.vocab_path, 'slot_vocab')) intent_vocab = loadVocabulary(os.path.join(arg.vocab_path, 'intent_vocab')) def createModel(input_data, input_size, sequence_length, slot_size, intent_size,
else: print('use own dataset: ', arg.dataset) full_train_path = os.path.join('../input_data', arg.dataset, arg.train_data_path, arg.input_file) full_valid_path = os.path.join('../input_data', arg.dataset, arg.valid_data_path, arg.input_file) full_test_path = os.path.join('../input_data', arg.dataset, arg.test_data_path, arg.input_file) full_inference_path = os.path.join('../input_data', arg.dataset, arg.test_data_path, arg.inference_file) full_inference_label_path = os.path.join('../input_data', arg.dataset, arg.test_data_path, arg.inference_label_file) createVocabulary("../input_data/" + arg.dataset + "/" + arg.embed_path, "../input_data/" + arg.dataset + "/in_vocab", pad=True, unk=True) in_vocab = loadVocabulary("../input_data/" + arg.dataset + "/in_vocab") logging.info("vocab created") # Create Training Model with tf.variable_scope('triplet_model'): global_step = tf.Variable(0, trainable=False, name='global_step') model = TripletModel(arg) outputs = model.build_model() with tf.variable_scope('loss'): cos_an, cos_ap = outputs loss = tf.maximum(0.0, cos_an - cos_ap + arg.margin) params = tf.trainable_variables()