Exemple #1
0
def train_with_dummy(args, threshold=0.8):
    train_params, train_df = h.clean_with_dummy(
        args[0] if len(args) > 0 else "train.csv")
    X_train, Y_train, X_test, Y_test = h.prepare(train_df)
    model_params, costs, accuracy, Y_test_pred = h.train_logistic(
        X_train, Y_train, X_test, Y_test, threshold, False)
    train_params.update(model_params)
    metrics = {
        'accuracy': accuracy,
        'confusion_matrix': sm.confusion_matrix(Y_test, Y_test_pred),
        'classification_report': sm.classification_report(Y_test, Y_test_pred)
    }
    return train_params, metrics, costs
Exemple #2
0
def train_with_dummy(args):
    train_params, train_df = h.clean_with_dummy(
        args[0] if len(args) > 0 else "train.csv")
    X_train, Y_train, X_test, Y_test = h.prepare(train_df)
    classifier, test_costs, n = h.train_knn(X_train, Y_train, X_test, Y_test)
    Y_test_pred = classifier.predict(X_test)
    metrics = {
        'accuracy': {
            'train': classifier.score(X_train, Y_train),
            'test': classifier.score(X_test, Y_test)
        },
        'confusion_matrix': sm.confusion_matrix(Y_test, Y_test_pred),
        'classification_report': sm.classification_report(Y_test, Y_test_pred)
    }
    return train_params, metrics, classifier, test_costs, n
Exemple #3
0
def convert_to_aleph(input_dict):
    return_dict = {'background': ''}
    data = rdflib.Graph()
    prepare(data)
    print "parsing examples"
    data.parse(data=input_dict['examples'], format='n3')
    print "parsing bk"
    for ontology in input_dict['bk_file']:
        data.parse(data=ontology, format='n3')
    settings = input_dict['settings'] if 'settings' in input_dict else ALEPH_SETTINGS
    generalizations = defaultdict(list)
    annotations = defaultdict(list)
    print "going through generalization predicates"
    generelization_predicates = list(data.subjects(predicate=RDF.type, object=HEDWIG.GeneralizationPredicate))
    for predicate in generelization_predicates:
        for sub, obj in data.subject_objects(predicate=predicate):
            if user_defined(sub) and user_defined(obj):
                generalizations[sub].append(obj)
    print "going through examples"
    pos = ''
    neg = ''
    positive_class = Literal(input_dict['positive_class'])
    for example in data.subjects(predicate=RDF.type, object=HEDWIG.Example):
        positive = (example, HEDWIG.class_label, positive_class) in data

        if positive:
            pos += 'positive(\'%s\').\n' % example
        else:
            neg += 'positive(\'%s\').\n' % example
        for annotation_link in data.objects(subject=example, predicate=HEDWIG.annotated_with):
            example_annotations = data.objects(subject=annotation_link, predicate=HEDWIG.annotation)
            annotation = example_annotations.next()
            if next(example_annotations, None) is not None:
                raise Exception("Unable to parse data - annotations for example %s are unclear" % example)
            annotations[example].append(annotation)
    print "writing bk"
    bk = ':- modeh(1, positive(+instance)).\n'
    bk += ':- mode(*, annotated_with(+instance, #annotation)).\n'
    bk += ':- determination(positive/1, annotated_with/2).\n'
    bk += '\n\n'
    for setting in settings:
        bk += ':- set(%s, %s).\n' % (setting, settings[setting])
    bk += '\n\n'

    for sub_concept in generalizations:
        for super_concept in generalizations[sub_concept]:
            bk += 'annotated_with(X, \'%s\') :- annotated_with(X, \'%s\').\n' % (super_concept, sub_concept)
    bk += '\n'
    print "writing pos and neg"
    i = 0
    print len(annotations)
    for example in annotations:
        i += 1
        if i%1000 == 0:
            print i
        for concept in annotations[example]:
            bk += 'annotated_with(\'%s\', \'%s\').\n' % (example, concept)
    return_dict['bk'] = bk
    return_dict['pos'] = pos
    return_dict['neg'] = neg
    print "done!!!"
    return return_dict