def test_with_sample_input(self):
        """ User passes correct args, should pass
        """
        raised = False

        config_path = os.path.join(tool_base, 'config.ini')
        config = configparser.ConfigParser()
        config.read(config_path)

        username = config[WCS_CREDS_SECTION][WCS_USERNAME_ITEM]
        password = config[WCS_CREDS_SECTION][WCS_PASSWORD_ITEM]

        intent_path = os.path.join(tool_base, 'resources', 'sample',
                                   'intents.csv')
        entity_path = os.path.join(tool_base, 'resources', 'sample',
                                   'entities.csv')

        train_args = [
            sys.executable, TRAIN_CONVERSATION_PATH, '-i', intent_path, '-e',
            entity_path, '-u', username, '-p', password
        ]

        workspace_spec_json = os.path.join(self.test_dir, SPEC_FILENAME)

        print('Begin training')
        returncode = 0
        with open(workspace_spec_json, 'w+') as f:
            returncode = subprocess.run(train_args, stdout=f).returncode

        try:
            with open(workspace_spec_json, 'r') as f:
                if returncode != 0:
                    print('Training failed')
                    print(f.read())
                    raise Exception()
                else:
                    print('Training complete')
                    workspace_id = json.load(f)[WORKSPACE_ID_TAG]
                    test_in_path = os.path.join(tool_base, 'resources',
                                                'sample', 'test.csv')
                    test_out_path = os.path.join(self.test_dir, 'test-out.csv')
                    test_args = [
                        sys.executable, self.script_path, '-i', test_in_path,
                        '-o', test_out_path, '-w', workspace_id, '-t',
                        'utterance', '-m', '-g', 'golden intent', '-u',
                        username, '-p', password, '-r',
                        str(DEFAULT_TEST_RATE)
                    ]
                    print('Begin testing')
                    returncode = subprocess.run(test_args).returncode
                    delete_workspaces(username, password, [workspace_id])
                    if returncode != 0:
                        print('Testing failed')
                        raise Exception()
                    print('Testing completed')

        except Exception as e:
            print(e)
            raised = True
        self.assertFalse(raised, 'Exception raised')
Ejemplo n.º 2
0
    def test_std(self):
        """ User passes correct standard test config, should pass
        """
        raised = False

        std_test_dir = os.path.join(self.test_dir, STANDARD_TEST)
        if not os.path.exists(std_test_dir):
            os.makedirs(std_test_dir)

        # Use fold 0 training input as intent
        intent_path = os.path.join(tool_base, 'resources', 'sample', 'kfold',
                                   '0', 'train.csv')
        entity_path = os.path.join(tool_base, 'resources', 'sample',
                                   'entities.csv')

        username = self.config[WCS_CREDS_SECTION][WCS_USERNAME_ITEM]
        password = self.config[WCS_CREDS_SECTION][WCS_PASSWORD_ITEM]

        args = [
            sys.executable, TRAIN_CONVERSATION_PATH, '-i', intent_path, '-e',
            entity_path, '-u', username, '-p', password
        ]

        workspace_spec_json = os.path.join(self.test_dir, SPEC_FILENAME)
        # Train a new instance in order to pull the workspace detail
        print('Begin training for setting up environment')
        with open(workspace_spec_json, 'w+') as f:
            if subprocess.run(args, stdout=f).returncode != 0:
                print('Training failed')
                print(f.read())
                raise Exception()

        with open(workspace_spec_json, 'r') as f:
            self.config[DEFAULT_SECTION][WORKSPACE_ID_ITEM] = \
                json.load(f)[WORKSPACE_ID_TAG]

        test_input_path = os.path.join(tool_base, 'resources', 'sample',
                                       'kfold', '0', 'test.csv')

        self.config[DEFAULT_SECTION][MODE_ITEM] = STANDARD_TEST
        self.config[DEFAULT_SECTION][TEMP_DIR_ITEM] = std_test_dir
        self.config[DEFAULT_SECTION][TEST_FILE_ITEM] = test_input_path
        self.config[DEFAULT_SECTION][TEST_OUT_PATH_ITEM] = \
            os.path.join(std_test_dir, 'test-out.csv')

        std_config_path = os.path.join(std_test_dir, 'config.ini')

        with open(std_config_path, 'w') as configfile:
            self.config.write(configfile)

        args = [sys.executable, self.script_path, '-c', std_config_path]

        if subprocess.run(args).returncode != 0:
            raised = True

        delete_workspaces(username, password,
                          [self.config[DEFAULT_SECTION][WORKSPACE_ID_ITEM]])

        self.assertFalse(raised, 'Exception raised')
Ejemplo n.º 3
0
    def test_kfold(self):
        """ User passes correct kfold config, should pass
        """
        raised = False

        kfold_test_dir = os.path.join(self.test_dir, KFOLD)
        if not os.path.exists(kfold_test_dir):
            os.makedirs(kfold_test_dir)

        intent_path = os.path.join(tool_base, 'resources', 'sample',
                                   'intents.csv')
        entity_path = os.path.join(tool_base, 'resources', 'sample',
                                   'entities.csv')

        username = self.config[WCS_CREDS_SECTION][WCS_USERNAME_ITEM]
        password = self.config[WCS_CREDS_SECTION][WCS_PASSWORD_ITEM]

        args = [
            sys.executable, TRAIN_CONVERSATION_PATH, '-i', intent_path, '-e',
            entity_path, '-u', username, '-p', password
        ]

        workspace_spec_json = os.path.join(self.test_dir, SPEC_FILENAME)
        # Train a new instance in order to pull the workspace detail
        print('Begin training for setting up environment')
        with open(workspace_spec_json, 'w+') as f:
            if subprocess.run(args, stdout=f).returncode != 0:
                print('Training failed')
                print(f.read())
                raise Exception()

        with open(workspace_spec_json, 'r') as f:
            self.config[DEFAULT_SECTION][WORKSPACE_ID_ITEM] = \
                json.load(f)[WORKSPACE_ID_TAG]

        self.config[DEFAULT_SECTION][MODE_ITEM] = KFOLD
        self.config[DEFAULT_SECTION][TEMP_DIR_ITEM] = kfold_test_dir
        self.config[DEFAULT_SECTION][FIGURE_PATH_ITEM] = \
            os.path.join(kfold_test_dir, 'figure.png')
        self.config[DEFAULT_SECTION][FOLD_NUM_ITEM] = str(FOLD_NUM_DEFAULT)
        self.config[DEFAULT_SECTION][PARTIAL_CREDIT_TABLE_ITEM] = \
            os.path.join(tool_base, 'resources', 'sample',
                         'partial-credit-table.csv')

        kfold_config_path = os.path.join(kfold_test_dir, 'config.ini')

        with open(kfold_config_path, 'w') as configfile:
            self.config.write(configfile)

        args = [sys.executable, self.script_path, '-c', kfold_config_path]

        if subprocess.run(args).returncode != 0:
            raised = True

        delete_workspaces(username, password,
                          [self.config[DEFAULT_SECTION][WORKSPACE_ID_ITEM]])

        self.assertFalse(raised, 'Exception raised')
Ejemplo n.º 4
0
def test(out_dir, intent_train_file, workspace_base_file, test_out_path,
         test_input_file, keep_workspace, username, password, iam_apikey, version, url):
    print('Begin {} with following details:'.format(STANDARD_TEST.upper()))
    print('{}={}'.format(INTENT_FILE_ITEM, intent_train_file))
    print('{}={}'.format(WORKSPACE_BASE_ITEM, workspace_base_file))
    print('{}={}'.format(TEST_FILE_ITEM, test_input_file))
    print('{}={}'.format(TEST_OUT_PATH_ITEM, test_out_path))
    print('{}={}'.format(OUT_DIR_ITEM, out_dir))
    print('{}={}'.format(DO_KEEP_WORKSPACE_ITEM, BOOL_MAP[keep_workspace]))
    print('{}={}'.format(WCS_USERNAME_ITEM, username))
    print('{}={}'.format(WCS_BASEURL_ITEM, url))
    print('{}={}'.format(WA_API_VERSION_ITEM, version))

    # Validate test file
    extra_params = []
    test_input = pd.read_csv(test_input_file, quoting=csv.QUOTE_ALL,
                             encoding=UTF_8)
    header = list(test_input.columns.values)
    if UTTERANCE_COLUMN in header:
        extra_params += ['-t', UTTERANCE_COLUMN]
    else:
        if len(header) != 1:
            raise ValueError('Test input has unknown utterance column')

    # Run standard test
    working_dir = os.path.join(out_dir, STANDARD_TEST)
    if not os.path.exists(working_dir):
        os.makedirs(working_dir)

    workspace_spec_json = os.path.join(working_dir, SPEC_FILENAME)
    train_args = [sys.executable, TRAIN_CONVERSATION_PATH,
                  '-i', intent_train_file,
                  '-n', 'standard test', '-v', version,
                  '-u', username, '-p', password, '-a', iam_apikey, '-l', url,
                  '-w', workspace_base_file]
    with open(workspace_spec_json, 'w') as f:
        if subprocess.run(train_args, stdout=f).returncode == 0:
            print('Trained standard test workspace')
        else:
            raise RuntimeError('Failure in training workspace')

    workspace_id = None
    with open(workspace_spec_json, 'r') as f:
        workspace_id = json.load(f)[WORKSPACE_ID_TAG]
    try:
        if subprocess.run([sys.executable, TEST_CONVERSATION_PATH,
                           '-i', test_input_file,
                           '-o', test_out_path, '-m', '-v', version,
                           '-u', username, '-p', password, '-a', iam_apikey, '-l', url,
                           '-w', workspace_id,
                           '-r', str(MAX_TEST_RATE)] + extra_params
                          ).returncode == 0:
            print('Tested workspace')
        else:
            raise RuntimeError('Failure in testing data')
    finally:
        if not keep_workspace:
            delete_workspaces(username, password, iam_apikey, url, version, [workspace_id])
Ejemplo n.º 5
0
    def test_with_sample_input(self):
        """ User passes correct args, should pass
        """
        raised = False

        intent_path = os.path.join(tool_base, 'resources', 'sample',
                                   'intents.csv')
        entity_path = os.path.join(tool_base, 'resources', 'sample',
                                   'entities.csv')

        config_path = os.path.join(tool_base, 'config.ini')
        config = configparser.ConfigParser()
        config.read(config_path)

        username = config[WCS_CREDS_SECTION][WCS_USERNAME_ITEM]
        password = config[WCS_CREDS_SECTION][WCS_PASSWORD_ITEM]

        args = [
            sys.executable, TRAIN_CONVERSATION_PATH, '-i', intent_path, '-e',
            entity_path, '-u', username, '-p', password
        ]

        workspace_spec_json = os.path.join(self.test_dir, SPEC_FILENAME)
        # Train a new instance in order to pull the workspace detail
        print('Begin training for setting up environment')
        with open(workspace_spec_json, 'w+') as f:
            if subprocess.run(args, stdout=f).returncode != 0:
                print('Training failed')
                print(f.read())
                raise Exception()

        workspace_id = None
        with open(workspace_spec_json, 'r') as f:
            workspace_id = \
                json.load(f)[WORKSPACE_ID_TAG]

        args = \
            self.parser.parse_args(
                ['-w', workspace_id, '-u', username,
                 '-p', password, '-o', self.test_dir])
        try:
            workspaceParser.func(args)
        except Exception as e:
            print(e)
            raised = True

        delete_workspaces(username, password, [workspace_id])
        self.assertFalse(raised, 'Exception raised')
Ejemplo n.º 6
0
def kfold(fold_num, out_dir, intent_train_file, workspace_base_file,
          figure_path, keep_workspace, username, password, iam_apikey, url, version, weight_mode,
          conf_thres, partial_credit_table):
    FOLD_TRAIN = 'fold_train'
    FOLD_TEST = 'fold_test'
    WORKSPACE_SPEC = 'fold_workspace'
    WORKSPACE_NAME = 'workspace_name'
    TEST_OUT = 'test_out'

    print('Begin {} with following details:'.format(KFOLD.upper()))
    print('{}={}'.format(INTENT_FILE_ITEM, intent_train_file))
    print('{}={}'.format(WORKSPACE_BASE_ITEM, workspace_base_file))
    print('{}={}'.format(FIGURE_PATH_ITEM, figure_path))
    print('{}={}'.format(OUT_DIR_ITEM, out_dir))
    print('{}={}'.format(FOLD_NUM_ITEM, fold_num))
    print('{}={}'.format(DO_KEEP_WORKSPACE_ITEM, BOOL_MAP[keep_workspace]))
    print('{}={}'.format(WEIGHT_MODE_ITEM, weight_mode))
    print('{}={}'.format(CONF_THRES_ITEM, conf_thres))
    print('{}={}'.format(WCS_USERNAME_ITEM, username))
    print('{}={}'.format(WCS_BASEURL_ITEM, url))
    print('{}={}'.format(WA_API_VERSION_ITEM, version))
    print('{}={}'.format(PARTIAL_CREDIT_TABLE_ITEM, partial_credit_table))

    working_dir = os.path.join(out_dir, KFOLD)
    if not os.path.exists(working_dir):
        os.makedirs(working_dir)

    # Prepare folds
    if subprocess.run([sys.executable, CREATE_TEST_TRAIN_FOLDS_PATH,
                       '-i', intent_train_file, '-o', working_dir,
                       '-k', str(fold_num)],
                      stdout=subprocess.PIPE).returncode == 0:
        print('Created {} folds'.format(str(fold_num)))
    else:
        raise RuntimeError('Failure in folds creation')

    # Construct fold params
    fold_params = [{FOLD_TRAIN: os.path.join(working_dir, str(idx),
                                             TRAIN_FILENAME),
                    FOLD_TEST: os.path.join(working_dir, str(idx),
                                            TEST_FILENAME),
                    TEST_OUT: os.path.join(working_dir, str(idx),
                                           TEST_OUT_FILENAME),
                    WORKSPACE_SPEC: os.path.join(working_dir,
                                                 str(idx), SPEC_FILENAME),
                    WORKSPACE_NAME: '{}_{}'.format(KFOLD, str(idx))}
                   for idx in range(fold_num)]

    # Begin training
    train_processes_specs = {}
    for fold_param in fold_params:
        spec_file = open(fold_param[WORKSPACE_SPEC], 'w')
        train_args = [sys.executable, TRAIN_CONVERSATION_PATH,
                      '-i', fold_param[FOLD_TRAIN],
                      '-n', fold_param[WORKSPACE_NAME],
                      '-u', username, '-p', password,
                      '-a', iam_apikey,
                      '-l', url, '-v', version,
                      '-w', workspace_base_file]
        train_processes_specs[
            subprocess.Popen(train_args, stdout=spec_file)] = spec_file

    train_failure_idx = []
    for idx, (process, file) in enumerate(train_processes_specs.items()):
        if process.wait() == 0:
            file.close()
        else:
            train_failure_idx.append(idx)

    try:
        if len(train_failure_idx) != 0:
            raise RuntimeError(
                'Fail to train {} fold workspace'.format(','.join(
                    str(train_failure_idx))))

        print('Trained {} workspaces'.format(str(fold_num)))

        # Begin testing
        test_processes = []
        workspace_ids = []
        FOLD_TEST_RATE = int(MAX_TEST_RATE / fold_num)
        for fold_param in fold_params:
            workspace_id = None
            with open(fold_param[WORKSPACE_SPEC]) as f:
                workspace_id = json.load(f)[WORKSPACE_ID_TAG]
                workspace_ids.append(workspace_id)
            test_args = [sys.executable, TEST_CONVERSATION_PATH,
                         '-i', fold_param[FOLD_TEST],
                         '-o', fold_param[TEST_OUT],
                         '-u', username, '-p', password,
                         '-a', iam_apikey, '-l', url, '-v', version,
                         '-t', UTTERANCE_COLUMN, '-g', GOLDEN_INTENT_COLUMN,
                         '-w', workspace_id, '-r', str(FOLD_TEST_RATE),
                         '-m']
            if partial_credit_table is not None:
                test_args += ['--partial_credit_table', partial_credit_table]
            test_processes.append(subprocess.Popen(test_args))

        test_failure_idx_str = []
        for idx, process in enumerate(test_processes):
            if process.wait() != 0:
                test_failure_idx_str.append(str(idx))

        if len(test_failure_idx_str) != 0:
            raise RuntimeError('Fail to test {} fold workspace'.format(
                ','.join(test_failure_idx_str)))

        print('Tested {} workspaces'.format(str(fold_num)))

        test_out_files = [fold_param[TEST_OUT] for fold_param in fold_params]

        # Add a column for the fold number
        for idx, this_file in enumerate(test_out_files):
            this_df = pd.read_csv(this_file, quoting=csv.QUOTE_ALL, encoding='utf-8', \
                               keep_default_na=False)
            this_df['Fold Index'] = idx
            this_df.to_csv( this_file, encoding='utf-8', quoting=csv.QUOTE_ALL, index=False )


        # Union test out
        kfold_result_file = os.path.join(out_dir, KFOLD_UNION_FILE)
        pd.concat([pd.read_csv(file, quoting=csv.QUOTE_ALL, encoding=UTF_8,
                               keep_default_na=False)
                   for file in test_out_files]) \
          .to_csv(kfold_result_file,
                  encoding='utf-8', quoting=csv.QUOTE_ALL, index=False)
        print("Wrote k-fold result file to {}".format(kfold_result_file))

        classfier_names = ['Fold {}'.format(idx) for idx in range(fold_num)]

        plot_args = [sys.executable, CREATE_PRECISION_CURVE_PATH,
                     '-t', '{} Fold Test'.format(str(fold_num)),
                     '-o', figure_path, '-w', weight_mode,
                     '--tau', conf_thres, '-n'] + \
            classfier_names + ['-i'] + test_out_files

        if subprocess.run(plot_args).returncode != 0:
            raise RuntimeError('Failure in plotting curves')

        kfold_result_file_base = kfold_result_file[:-4]
        metrics_args = [sys.executable, INTENT_METRICS_PATH,
                     '-i', kfold_result_file,
                     '-o', kfold_result_file_base+".metrics.csv",
                     '--partial_credit_on', str(partial_credit_table is not None)]
        if subprocess.run(metrics_args).returncode != 0:
            raise RuntimeError('Failure in generating intent metrics')

        confusion_args = [sys.executable, CONFUSION_MATRIX_PATH,
                          '-i', kfold_result_file,
                          '-o', kfold_result_file_base+".confusion_args.csv"]
        if subprocess.run(confusion_args).returncode != 0:
            raise RuntimeError('Failure in generating confusion matrix')

    finally:
        if not keep_workspace:
            workspace_ids = []
            for idx in range(fold_num):
                if idx not in train_failure_idx:
                    with open(fold_params[idx][WORKSPACE_SPEC]) as f:
                        workspace_id = json.load(f)[WORKSPACE_ID_TAG]
                        workspace_ids.append(workspace_id)

            delete_workspaces(username, password, iam_apikey, url, version, workspace_ids)
Ejemplo n.º 7
0
def blind(out_dir, intent_train_file, workspace_base_file, figure_path,
          test_out_path, test_input_file, previous_blind_out, keep_workspace,
          username, password, iam_apikey, url, version, weight_mode, conf_thres, partial_credit_table, figure_title):
    print('Begin {} with following details:'.format(BLIND_TEST.upper()))
    print('{}={}'.format(INTENT_FILE_ITEM, intent_train_file))
    print('{}={}'.format(WORKSPACE_BASE_ITEM, workspace_base_file))
    print('{}={}'.format(TEST_FILE_ITEM, test_input_file))
    print('{}={}'.format(PREVIOUS_BLIND_OUT_ITEM, previous_blind_out))
    print('{}={}'.format(FIGURE_PATH_ITEM, figure_path))
    print('{}={}'.format(TEST_OUT_PATH_ITEM, test_out_path))
    print('{}={}'.format(OUT_DIR_ITEM, out_dir))
    print('{}={}'.format(DO_KEEP_WORKSPACE_ITEM, BOOL_MAP[keep_workspace]))
    print('{}={}'.format(WEIGHT_MODE_ITEM, weight_mode))
    print('{}={}'.format(CONF_THRES_ITEM, conf_thres))
    print('{}={}'.format(WCS_USERNAME_ITEM, username))
    print('{}={}'.format(WCS_BASEURL_ITEM, url))
    print('{}={}'.format(WA_API_VERSION_ITEM, version))
    print('{}={}'.format(PARTIAL_CREDIT_TABLE_ITEM, partial_credit_table))

    # Validate previous blind out format
    test_out_files = [test_out_path]
    classfier_names = ['New Classifier']

    if previous_blind_out is not None:
        df = pd.read_csv(previous_blind_out, quoting=csv.QUOTE_ALL,
                         encoding=UTF_8)
        header = list(df.columns.values)

        if INTENT_JUDGE_COLUMN not in header:
            raise ValueError(
                "'{}' column doesn't exist in {}.".format(
                    INTENT_JUDGE_COLUMN, previous_blind_out))

        test_out_files.append(previous_blind_out)
        classfier_names.append('Old Classifier')

    working_dir = os.path.join(out_dir, BLIND_TEST)
    if not os.path.exists(working_dir):
        os.makedirs(working_dir)

    workspace_spec_json = os.path.join(working_dir, SPEC_FILENAME)
    train_args = [sys.executable, TRAIN_CONVERSATION_PATH,
                  '-i', intent_train_file, '-n', 'blind test',
                  '-u', username, '-p', password,
                  '-a', iam_apikey,
                  '-l', url, '-v', version,
                  '-w', workspace_base_file]
    with open(workspace_spec_json, 'w') as f:
        if subprocess.run(train_args, stdout=f).returncode == 0:
            print('Trained blind workspace')
        else:
            raise RuntimeError('Failure in training workspace')

    workspace_id = None
    with open(workspace_spec_json, 'r') as f:
        workspace_id = json.load(f)[WORKSPACE_ID_TAG]
    try:
        test_args = [sys.executable, TEST_CONVERSATION_PATH,
                     '-i', test_input_file,
                     '-o', test_out_path, '-m',
                     '-u', username, '-p', password, '-a', iam_apikey, '-l', url,
                     '-t', UTTERANCE_COLUMN, '-g', GOLDEN_INTENT_COLUMN,
                     '-w', workspace_id, '-v', version,
                     '-r', str(MAX_TEST_RATE)]
        if partial_credit_table is not None:
            test_args += ['--partial_credit_table', partial_credit_table]
        if subprocess.run(test_args).returncode == 0:
            print('Tested blind workspace')
        else:
            raise RuntimeError('Failure in testing blind data')

        if subprocess.run([sys.executable, CREATE_PRECISION_CURVE_PATH,
                           '-t', figure_title, '-w', weight_mode, '--tau',
                           conf_thres, '-o', figure_path,
                           '-n'] + classfier_names +
                          ['-i'] + test_out_files).returncode != 0:
            raise RuntimeError('Failure in plotting curves')

        blind_result_file = test_out_path
        blind_result_file_base = blind_result_file[:-4]
        metrics_args = [sys.executable, INTENT_METRICS_PATH,
                        '-i', blind_result_file,
                        '-o', blind_result_file_base+"_metrics.csv",
                        '--partial_credit_on', str(partial_credit_table is not None)]
        if subprocess.run(metrics_args).returncode != 0:
            raise RuntimeError('Failure in generating intent metrics')

        confusion_args = [sys.executable, CONFUSION_MATRIX_PATH,
                          '-i', blind_result_file,
                          '-o', blind_result_file_base+"_confusion.csv"]
        if subprocess.run(confusion_args).returncode != 0:
            raise RuntimeError('Failure in generating confusion matrix')
    finally:
        if not keep_workspace:
            delete_workspaces(username, password, iam_apikey, url, version, [workspace_id])
Ejemplo n.º 8
0
    def test_kfold(self):
        """ User passes correct kfold config, should pass
        """
        print('=============================================')
        print('test_kfold - Executes KFOLD tests with run.py')
        print('=============================================')

        raised = False

        kfold_test_dir = os.path.join(self.test_dir, KFOLD)
        if not os.path.exists(kfold_test_dir):
            os.makedirs(kfold_test_dir)

        intent_path = os.path.join(tool_base, 'resources', 'sample',
                                   'intents.csv')
        entity_path = os.path.join(tool_base, 'resources', 'sample',
                                   'entities.csv')

        apikey = self.config[WCS_CREDS_SECTION][WCS_IAM_APIKEY_ITEM]

        args = [
            sys.executable, TRAIN_CONVERSATION_PATH, '-i', intent_path, '-e',
            entity_path, '-a', apikey, '-n', 'KFOLD_TEST_RUN', '-l',
            self.config[WCS_CREDS_SECTION][WCS_BASEURL_ITEM]
        ]

        workspace_spec_json = os.path.join(self.test_dir, SPEC_FILENAME)
        # Train a new instance in order to pull the workspace detail
        print('Begin training for setting up environment')
        with open(workspace_spec_json, 'w+') as f:
            if subprocess.run(args, stdout=f).returncode != 0:
                print('Training failed')
                print(f.read())
                raise Exception()

        with open(workspace_spec_json, 'r') as f:
            self.config[DEFAULT_SECTION][WORKSPACE_ID_ITEM] = json.load(
                f)[WORKSPACE_ID_TAG]

        self.config[DEFAULT_SECTION][MODE_ITEM] = KFOLD
        self.config[DEFAULT_SECTION][TEMP_DIR_ITEM] = kfold_test_dir
        self.config[DEFAULT_SECTION][FIGURE_PATH_ITEM] = \
            os.path.join(kfold_test_dir, 'figure.png')
        self.config[DEFAULT_SECTION][FOLD_NUM_ITEM] = str(FOLD_NUM_DEFAULT)
        self.config[DEFAULT_SECTION][PARTIAL_CREDIT_TABLE_ITEM] = \
            os.path.join(tool_base, 'resources', 'sample',
                         'partial-credit-table.csv')

        kfold_config_path = os.path.join(kfold_test_dir, 'config.ini')

        with open(kfold_config_path, 'w') as configfile:
            self.config.write(configfile)

        args = [sys.executable, self.script_path, '-c', kfold_config_path]

        if subprocess.run(args).returncode != 0:
            raised = True

        workspace_ids = []
        workspace_ids.append(self.config[DEFAULT_SECTION][WORKSPACE_ID_ITEM])

        delete_workspaces(apikey,
                          self.config[WCS_CREDS_SECTION][WCS_BASEURL_ITEM],
                          '2019-02-28', workspace_ids)

        self.assertFalse(raised, 'Exception raised')
Ejemplo n.º 9
0
def kfold(fold_num, temp_dir, intent_train_file, workspace_base_file,
          figure_path, keep_workspace, username, password, weight_mode,
          conf_thres):
    FOLD_TRAIN = 'fold_train'
    FOLD_TEST = 'fold_test'
    WORKSPACE_SPEC = 'fold_workspace'
    WORKSPACE_NAME = 'workspace_name'
    TEST_OUT = 'test_out'

    print('Begin {} with following details:'.format(KFOLD.upper()))
    print('{}={}'.format(INTENT_FILE_ITEM, intent_train_file))
    print('{}={}'.format(WORKSPACE_BASE_ITEM, workspace_base_file))
    print('{}={}'.format(FIGURE_PATH_ITEM, figure_path))
    print('{}={}'.format(TEMP_DIR_ITEM, temp_dir))
    print('{}={}'.format(FOLD_NUM_ITEM, fold_num))
    print('{}={}'.format(DO_KEEP_WORKSPACE_ITEM, BOOL_MAP[keep_workspace]))
    print('{}={}'.format(WEIGHT_MODE_ITEM, weight_mode))
    print('{}={}'.format(CONF_THRES_ITEM, conf_thres))
    print('{}={}'.format(WCS_USERNAME_ITEM, username))

    working_dir = os.path.join(temp_dir, KFOLD)
    if not os.path.exists(working_dir):
        os.makedirs(working_dir)

    # Prepare folds
    if subprocess.run([
            sys.executable, CREATE_TEST_TRAIN_FOLDS_PATH, '-i',
            intent_train_file, '-o', working_dir, '-k',
            str(fold_num)
    ],
                      stdout=subprocess.PIPE).returncode == 0:
        print('Created {} folds'.format(str(fold_num)))
    else:
        raise RuntimeError('Failure in folds creation')

    # Construct fold params
    fold_params = [{
        FOLD_TRAIN:
        os.path.join(working_dir, str(idx), TRAIN_FILENAME),
        FOLD_TEST:
        os.path.join(working_dir, str(idx), TEST_FILENAME),
        TEST_OUT:
        os.path.join(working_dir, str(idx), TEST_OUT_FILENAME),
        WORKSPACE_SPEC:
        os.path.join(working_dir, str(idx), SPEC_FILENAME),
        WORKSPACE_NAME:
        '{}_{}'.format(KFOLD, str(idx))
    } for idx in range(fold_num)]

    # Begin training
    train_processes_specs = {}
    for fold_param in fold_params:
        spec_file = open(fold_param[WORKSPACE_SPEC], 'w')
        train_args = [
            sys.executable, TRAIN_CONVERSATION_PATH, '-i',
            fold_param[FOLD_TRAIN], '-n', fold_param[WORKSPACE_NAME], '-u',
            username, '-p', password, '-w', workspace_base_file
        ]
        train_processes_specs[subprocess.Popen(train_args,
                                               stdout=spec_file)] = spec_file

    train_failure_idx = []
    for idx, (process, file) in enumerate(train_processes_specs.items()):
        if process.wait() == 0:
            file.close()
        else:
            train_failure_idx.append(idx)

    try:
        if len(train_failure_idx) != 0:
            raise RuntimeError('Fail to train {} fold workspace'.format(
                ','.join(str(train_failure_idx))))

        print('Trained {} workspaces'.format(str(fold_num)))

        # Begin testing
        test_processes = []
        workspace_ids = []
        FOLD_TEST_RATE = int(MAX_TEST_RATE / fold_num)
        for fold_param in fold_params:
            workspace_id = None
            with open(fold_param[WORKSPACE_SPEC]) as f:
                workspace_id = json.load(f)[WORKSPACE_ID_TAG]
                workspace_ids.append(workspace_id)
            test_args = [
                sys.executable, TEST_CONVERSATION_PATH, '-i',
                fold_param[FOLD_TEST], '-o', fold_param[TEST_OUT], '-u',
                username, '-p', password, '-t', UTTERANCE_COLUMN, '-g',
                GOLDEN_INTENT_COLUMN, '-w', workspace_id, '-r',
                str(FOLD_TEST_RATE), '-m'
            ]
            test_processes.append(subprocess.Popen(test_args))

        test_failure_idx_str = []
        for idx, process in enumerate(test_processes):
            if process.wait() != 0:
                test_failure_idx_str.append(str(idx))

        if len(test_failure_idx_str) != 0:
            raise RuntimeError('Fail to test {} fold workspace'.format(
                ','.join(test_failure_idx_str)))

        print('Tested {} workspaces'.format(str(fold_num)))

        test_out_files = [fold_param[TEST_OUT] for fold_param in fold_params]

        # Union test out
        pd.concat([pd.read_csv(file, quoting=csv.QUOTE_ALL, encoding=UTF_8,
                               keep_default_na=False)
                   for file in test_out_files]) \
          .to_csv(os.path.join(working_dir, KFOLD_UNION_FILE),
                  encoding='utf-8', quoting=csv.QUOTE_ALL, index=False)

        classfier_names = ['Fold {}'.format(idx) for idx in range(fold_num)]

        plot_args = [sys.executable, CREATE_PRECISION_CURVE_PATH,
                     '-t', '{} Fold Test'.format(str(fold_num)),
                     '-o', figure_path, '-w', weight_mode,
                     '--tau', conf_thres, '-n'] + \
            classfier_names + ['-i'] + test_out_files

        if subprocess.run(plot_args).returncode == 0:
            print('Generated precision curves for {} folds'.format(
                str(fold_num)))
        else:
            raise RuntimeError('Failure in plotting curves')
    finally:
        if not keep_workspace:
            workspace_ids = []
            for idx in range(fold_num):
                if idx not in train_failure_idx:
                    with open(fold_params[idx][WORKSPACE_SPEC]) as f:
                        workspace_id = json.load(f)[WORKSPACE_ID_TAG]
                        workspace_ids.append(workspace_id)

            delete_workspaces(username, password, workspace_ids)
Ejemplo n.º 10
0
def blind(temp_dir, intent_train_file, workspace_base_file, figure_path,
          test_out_path, test_input_file, previous_blind_out, keep_workspace,
          username, password, weight_mode, conf_thres):
    print('Begin {} with following details:'.format(BLIND_TEST.upper()))
    print('{}={}'.format(INTENT_FILE_ITEM, intent_train_file))
    print('{}={}'.format(WORKSPACE_BASE_ITEM, workspace_base_file))
    print('{}={}'.format(TEST_FILE_ITEM, test_input_file))
    print('{}={}'.format(PREVIOUS_BLIND_OUT_ITEM, previous_blind_out))
    print('{}={}'.format(FIGURE_PATH_ITEM, figure_path))
    print('{}={}'.format(TEST_OUT_PATH_ITEM, test_out_path))
    print('{}={}'.format(TEMP_DIR_ITEM, temp_dir))
    print('{}={}'.format(DO_KEEP_WORKSPACE_ITEM, BOOL_MAP[keep_workspace]))
    print('{}={}'.format(WEIGHT_MODE_ITEM, weight_mode))
    print('{}={}'.format(CONF_THRES_ITEM, conf_thres))
    print('{}={}'.format(WCS_USERNAME_ITEM, username))

    # Validate previous blind out format
    test_out_files = [test_out_path]
    classfier_names = ['New Classifier']

    if previous_blind_out is not None:
        df = pd.read_csv(previous_blind_out,
                         quoting=csv.QUOTE_ALL,
                         encoding=UTF_8)
        header = list(df.columns.values)

        if INTENT_JUDGE_COLUMN not in header:
            raise ValueError("'{}' column doesn't exist in {}.".format(
                INTENT_JUDGE_COLUMN, previous_blind_out))

        test_out_files.append(previous_blind_out)
        classfier_names.append('Old Classifier')

    working_dir = os.path.join(temp_dir, BLIND_TEST)
    if not os.path.exists(working_dir):
        os.makedirs(working_dir)

    workspace_spec_json = os.path.join(working_dir, SPEC_FILENAME)
    train_args = [
        sys.executable, TRAIN_CONVERSATION_PATH, '-i', intent_train_file, '-n',
        'blind test', '-u', username, '-p', password, '-w', workspace_base_file
    ]
    with open(workspace_spec_json, 'w') as f:
        if subprocess.run(train_args, stdout=f).returncode == 0:
            print('Trained blind workspace')
        else:
            raise RuntimeError('Failure in training workspace')

    workspace_id = None
    with open(workspace_spec_json, 'r') as f:
        workspace_id = json.load(f)[WORKSPACE_ID_TAG]
    try:
        if subprocess.run([
                sys.executable, TEST_CONVERSATION_PATH, '-i', test_input_file,
                '-o', test_out_path, '-m', '-u', username, '-p', password,
                '-t', UTTERANCE_COLUMN, '-g', GOLDEN_INTENT_COLUMN, '-w',
                workspace_id, '-r',
                str(MAX_TEST_RATE)
        ]).returncode == 0:
            print('Tested blind workspace')
        else:
            raise RuntimeError('Failure in testing blind data')

        if subprocess.run([
                sys.executable, CREATE_PRECISION_CURVE_PATH, '-t',
                'Golden Test Set', '-w', weight_mode, '--tau', conf_thres,
                '-o', figure_path, '-n'
        ] + classfier_names + ['-i'] + test_out_files).returncode == 0:
            print('Generated precision curves for blind set')
        else:
            raise RuntimeError('Failure in plotting curves')
    finally:
        if not keep_workspace:
            delete_workspaces(username, password, [workspace_id])