def __init__(self, csv_file, train_data=None, flag='train'):
     self.csv_file = csv_file
     checkExistence(self.csv_file)
     self._load_data()
     if flag == 'test':
         assert isinstance(
             train_data, DataSetCSV), 'train_data is not an instance of DataSetCSV'
         self.userIntent2id = train_data.userIntent2id
         self.id2userIntent = train_data.id2userIntent
         self.userIntent_vocab_size = train_data.userIntent_vocab_size
         self.userTag2id = train_data.userTag2id
         self.id2userTag = train_data.id2userTag
         self.userTag_vocab_size = train_data.userTag_vocab_size
         self.agentAct2id = train_data.agentAct2id
         self.id2agentAct = train_data.id2agentAct
         self.agentAct_vocab_size = train_data.agentAct_vocab_size
         self.word2id = train_data.word2id
         self.id2word = train_data.id2word
         self.word_vocab_size = train_data.word_vocab_size
         self.userTagIntent2id = train_data.userTagIntent2id
         self.id2userTagIntent = train_data.id2userTagIntent
         self.userTagIntent_vocab_size = train_data.userTagIntent_vocab_size
         self.userTagIntentAgentPrevAct2id = train_data.userTagIntentAgentPrevAct2id
         self.id2userTagIntentAgentPrevAct = train_data.id2userTagIntentAgentPrevAct
         self.userTagIntentAgentPrevAct_vocab_size = train_data.userTagIntentAgentPrevAct_vocab_size
     elif flag == 'train':
         self._get_params()
     else:
         raise Exception('Unknown flag found: {}'.format(flag))
def readTagPredTxt(tag_pred_txt, userTag2id, sample_nb, userTag_vocab_size):
    checkExistence(tag_pred_txt)
    indicator = np.zeros((sample_nb, userTag_vocab_size))
    with open(tag_pred_txt, 'rb') as f:
        for idx, line in enumerate(f):
            for tag in line.strip().split():
                tag = 'tag-{}'.format(tag)
                if tag in userTag2id:
                    pos = userTag2id[tag] - 1
                else:
                    pos = 0
                indicator[idx, pos] = 1.
    return indicator
def readIntentPredTxt(intent_pred_txt, userIntent2id, sample_nb,
                      userIntent_vocab_size):
    checkExistence(intent_pred_txt)
    indicator = np.zeros((sample_nb, userIntent_vocab_size))
    with open(intent_pred_txt, 'rb') as f:
        for idx, line in enumerate(f):
            for intent in line.strip().split(';'):
                if intent == 'null':
                    continue
                intent = 'intent-{}'.format(intent)
                if intent in userIntent2id:
                    pos = userIntent2id[intent] - 1
                else:
                    pos = 0
                indicator[idx, pos] = 1.
    return indicator
 def load_model(self):
     print('Loading model ...')
     # check existence of params
     assert os.path.exists(
         self.model_folder), 'model_fold is not found: {}'.format(
             self.model_folder)
     assert self.weights_fname is not None, 'Argument required: --weights-file'
     checkExistence(self.weights_fname)
     model_graph = '{}/graph-arch.yaml'.format(self.model_folder)
     model_train_vars = '{}/other_vars.npz'.format(self.model_folder)
     checkExistence(model_graph)
     checkExistence(model_train_vars)
     from keras.models import model_from_yaml
     with open(model_graph, 'r') as fgraph:
         self.model = model_from_yaml(fgraph.read())
         self.model.load_weights(self.weights_fname)
     npzfile = np.load(model_train_vars)
     self.maxlen_userUtter = np.int32(npzfile['maxlen_userUtter'][()])
     self.word_vocab_size = np.int32(npzfile['word_vocab_size'][()])
     self.userTag_vocab_size = np.int32(npzfile['userTag_vocab_size'][()])
     self.userIntent_vocab_size = np.int32(
         npzfile['userIntent_vocab_size'][()])
     self.id2userTag = npzfile['id2userTag'][()]
     self.id2word = npzfile['id2word'][()]
     self.id2userIntent = npzfile['id2userIntent'][()]
     self.userTag2id = npzfile['userTag2id'][()]
 def load_model(self):
     print('Loading model ...')
     # check existence of params
     assert os.path.exists(self.model_folder), 'model_folder is not found: {}'.format(self.model_folder)
     assert self.threshold is not None, 'Argument required: --threshold'
     assert self.weights_fname is not None, 'Argument required: --weights-file'
     checkExistence(self.weights_fname)
     model_graph = '{}/graph-arch.yaml'.format(self.model_folder)
     model_train_vars = '{}/other_vars.npz'.format(self.model_folder)
     checkExistence(model_graph)
     checkExistence(model_train_vars)
     # load models
     from keras.models import model_from_yaml
     with open(model_graph, 'r') as fgraph:
         self.model = model_from_yaml(fgraph.read())
         self.model.load_weights(self.weights_fname)
     npzfile = np.load(model_train_vars)
     self.agentAct_vocab_size = np.int32(npzfile['agentAct_vocab_size'][()])
     self.userTagIntent_vocab_size = np.int32(npzfile['userTagIntent_vocab_size'][()])
     self.id2agentAct = npzfile['id2agentAct'][()]
     self.window_size = np.int32(npzfile['window_size'][()])
Exemple #6
0
        action='store_true',
        help=
        'perform testing for oracle models (CRFtagger, OneVsRest SVMs) and their pipelined model if this option is activated.'
    )
    parser.add_argument('--model-folder',
                        dest='model_folder',
                        help='model folder')
    args = parser.parse_args()
    argparams = vars(args)
    train_only = argparams['train_only']
    test_only = argparams['test_only']
    assert train_only or test_only, 'Argument required: either --train, --test, or both.'

    # load train and test data
    npz_file = argparams['data_npz']
    checkExistence(npz_file)
    data_npz = np.load(npz_file)
    train_data = data_npz['train_data'][()]
    dev_data = data_npz['dev_data'][()]
    test_data = data_npz['test_data'][()]

    ###################################################################################
    ##### Training SlotTagging, Intent Prediction, and AgentAct Prediction models #####
    ###################################################################################
    if train_only:
        if argparams['model_folder'] is None:
            pid = os.getpid()
            argparams['model_folder'] = './model/baseline_{}'.format(pid)
        if not os.path.exists(argparams['model_folder']):
            os.makedirs(argparams['model_folder'])
    args['weights_fname'] = None
    args['threshold'] = None

    # argparams = vars(args)
    argparams = args
    # print(type(argparams))
    # early stop criteria are different for two tasks, therefore one model is
    # chosen for each.
    test_tag_only = argparams['test_tag_only']
    test_intent_only = argparams['test_intent_only']
    train_only = argparams['train_only']
    assert train_only or test_tag_only or test_intent_only, 'Arguments required: either --train, --test-tag, or --test-intent'
    pid = os.getpid()
    argparams['pid'] = pid
    npz_fname = argparams['data_npz']
    checkExistence(npz_fname)
    data_npz = np.load(npz_fname)
    if train_only:  # train model
        argparams['train_data'] = data_npz['train_data'][()]
        argparams['dev_data'] = data_npz['dev_data'][()]
        argparams['test_data'] = None
        model = SlotTaggingModel(**argparams)
        model.train()
    else:
        # train_only is False, while test_only is True
        # need to load model
        argparams['train_data'] = None
        argparams['dev_data'] = None
        argparams['test_data'] = None
        if argparams['model_folder'] is None:
            raise Exception('Argument required: --model-folder')
    parser.add_argument('--model-folder',
                        dest='model_folder',
                        help='model folder')
    args = parser.parse_args()
    argparams = vars(args)
    pid = os.getpid()
    npz_file = argparams['npz_file']
    intent_model_weights = argparams['intent_weights']
    tag_model_weights = argparams['tag_weights']
    act_model_weights = argparams['act_weights']
    threshold_intent = argparams['intent_threshold']
    tune_threshold = argparams['tune_threshold']
    threshold_act = argparams['act_threshold']

    # validate params
    checkExistence(npz_file)
    checkExistence(intent_model_weights)
    checkExistence(tag_model_weights)
    checkExistence(act_model_weights)
    assert threshold_intent is not None, 'Argument required: --intent-threshold'
    for key in sorted(argparams.keys()):
        print('\t{}={}'.format(key, argparams[key]))

    # load test data
    data_npz = np.load(npz_file)

    if tune_threshold:
        dev_result_folder = './model/pipe_{}/dev'.format(pid)
        if not os.path.exists(dev_result_folder):
            os.makedirs(dev_result_folder)
        print('\tdev_result_folder={}'.format(dev_result_folder))
Exemple #9
0
    #     formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    # parser.add_argument('--iob-file', dest='iob_file',
    #                     help='.iob file in DSTC4')
    # parser.add_argument('--csv-file', dest='csv_file',
    #                     help='the path of converted .csv file in DSTC4')
    # parser.add_argument('--root-subdialogs', dest='root_subdialogs',
    #                     help='the root directory of DSTC4 subdialogs.')
    # args = parser.parse_args()
    # iob_file = args.iob_file

    # iob_file='data/iob/dstc4.all.w-intent.train.iob'
    # checkExistence(iob_file)
    # # csv_file = args.csv_file
    # csv_file='data/csv/dstc4.all.w-intent.train.csv'
    # # root_subdialogs = args.root_subdialogs
    # root_subdialogs='data/DSTC5/data/'
    # checkExistence(root_subdialogs)

    iob_file = 'data/iob/dstc4.all.w-intent.test.iob'
    checkExistence(iob_file)
    csv_file = 'data/csv/dstc4.all.w-intent.test.csv'
    root_subdialogs = 'data/DSTC5/data/'
    checkExistence(root_subdialogs)

    utter_search = transformLabelJson_another(root_subdialogs)
    for k, v in utter_search.items():
        print(k, '--', v)
    dct_lst = readIOB(iob_file, utter_search)
    output_lst, fieldnames = mergeCSV(dct_lst)
    writeCSV(output_lst, csv_file, fieldnames)