예제 #1
0
def trainTasks(train_file, tasks, fold_num, output_folder=None, param_file="params.xml"):
    """
	Given a training instance file and a list of tasks, adapt the training
	vectors to fit the tasks and build an SVM model for each of them.

	"""
    global classifier
    if not output_folder:
        output_folder = "models"
    output_folder = os.path.join(output_folder, "fold-{0:02d}".format(fold_num + 1))
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    parameters = readParams(param_file)
    assert len(parameters) == len(
        tasks
    ), "The parameters file contains parameters for {0} tasks, whereas there are {1}.".format(
        len(parameters), len(tasks)
    )

    model_files = []
    labels, instances = read_problem(train_file)
    for i, task in enumerate(tasks):
        print "---training task {0:03d}/{1:03d}".format(i + 1, len(tasks))
        new_labels = [int(task[label]) for label in labels]

        params = parameters[i]
        paramstring = ""
        for param, value in params.items():
            paramstring += " {0} {1}".format(param, value)
        if classifier == "libsvm" and "-b" not in params.keys():
            paramstring += " -b 1"
        paramstring += " -q"

        model_file = os.path.join(output_folder, os.path.basename(train_file) + ".task{0:03d}.model".format(i + 1))
        model = train(new_labels, instances, paramstring)
        save_model(model_file, model)

        model_files.append(model_file)

    return model_files
예제 #2
0
def getPredictions(test_file, tasks, model_files, fold_num, debug=False):
    """
	Returns probability values of the +1 class per task as well as task
	accuracies (classification accuracies per task).

	Requires a test instance file, a list of tasks, and corresponding model
	files. Transform the vectors to fit the tasks and classifies them against
	the matching SVM model.

	"""
    assert len(tasks) == len(model_files), "Not as many model files as tasks"

    labels, instances = read_problem(test_file)

    models = []
    print "---Loading models..."
    for model_file in model_files:
        models.append(load_model(model_file))

    task_accuracies = [0.0 for _ in range(len(tasks))]

    print "---Classifying instances..."
    predictions = []

    if debug:
        debug_files = [
            open(os.path.basename(test_file) + ".fold{0:02d}.task{1:03d}.test".format(fold_num + 1, i + 1), "w")
            for i in range(len(tasks))
        ]
    for label, instance in zip(labels, instances):
        instance_predictions = []
        for i, (model, task) in enumerate(zip(models, tasks)):
            new_label = int(task[label])

            if debug:
                o_instance = [":".join([str(idx), str(val)]) for idx, val in sorted(instance.items())]
                debug_files[i].write("{0} {1}\n".format(new_label, " ".join(o_instance)))

            global classifier
            if classifier == "liblinear":
                pred_labels, ACC, pred_values, label_order = predict([new_label], [instance], model)
                assert len(pred_values[0]) == 1
                pred_value = pred_values[0][0]
                # If the label order is reversed, reverse the sign of the distance value.
                if label_order == [-1, 1]:
                    pred_value = -pred_value
                    # Normalize the value if it's not a probability value.
                pred_value = normalizePrediction(pred_value)
            elif classifier == "libsvm":
                pred_labels, (ACC, MSC, SCC), pred_values = predict([new_label], [instance], model, options="-b 1")
                label_order = model.get_labels()
                pred_value = pred_values[0][label_order.index(1)]

                # Add the value to the instance predictions.
            instance_predictions.append(pred_value)

            # Add one if the prediction was accurate
            if new_label == pred_labels[0]:
                task_accuracies[i] += 1

        predictions.append(instance_predictions)

    if debug:
        for debug_file in debug_files:
            debug_file.close()

    print "---Done."

    task_accuracies = [score / len(instances) for score in task_accuracies]

    return predictions, task_accuracies