def train_with_dummy(args, threshold=0.8): train_params, train_df = h.clean_with_dummy( args[0] if len(args) > 0 else "train.csv") X_train, Y_train, X_test, Y_test = h.prepare(train_df) model_params, costs, accuracy, Y_test_pred = h.train_logistic( X_train, Y_train, X_test, Y_test, threshold, False) train_params.update(model_params) metrics = { 'accuracy': accuracy, 'confusion_matrix': sm.confusion_matrix(Y_test, Y_test_pred), 'classification_report': sm.classification_report(Y_test, Y_test_pred) } return train_params, metrics, costs
def train_with_dummy(args): train_params, train_df = h.clean_with_dummy( args[0] if len(args) > 0 else "train.csv") X_train, Y_train, X_test, Y_test = h.prepare(train_df) classifier, test_costs, n = h.train_knn(X_train, Y_train, X_test, Y_test) Y_test_pred = classifier.predict(X_test) metrics = { 'accuracy': { 'train': classifier.score(X_train, Y_train), 'test': classifier.score(X_test, Y_test) }, 'confusion_matrix': sm.confusion_matrix(Y_test, Y_test_pred), 'classification_report': sm.classification_report(Y_test, Y_test_pred) } return train_params, metrics, classifier, test_costs, n
def convert_to_aleph(input_dict): return_dict = {'background': ''} data = rdflib.Graph() prepare(data) print "parsing examples" data.parse(data=input_dict['examples'], format='n3') print "parsing bk" for ontology in input_dict['bk_file']: data.parse(data=ontology, format='n3') settings = input_dict['settings'] if 'settings' in input_dict else ALEPH_SETTINGS generalizations = defaultdict(list) annotations = defaultdict(list) print "going through generalization predicates" generelization_predicates = list(data.subjects(predicate=RDF.type, object=HEDWIG.GeneralizationPredicate)) for predicate in generelization_predicates: for sub, obj in data.subject_objects(predicate=predicate): if user_defined(sub) and user_defined(obj): generalizations[sub].append(obj) print "going through examples" pos = '' neg = '' positive_class = Literal(input_dict['positive_class']) for example in data.subjects(predicate=RDF.type, object=HEDWIG.Example): positive = (example, HEDWIG.class_label, positive_class) in data if positive: pos += 'positive(\'%s\').\n' % example else: neg += 'positive(\'%s\').\n' % example for annotation_link in data.objects(subject=example, predicate=HEDWIG.annotated_with): example_annotations = data.objects(subject=annotation_link, predicate=HEDWIG.annotation) annotation = example_annotations.next() if next(example_annotations, None) is not None: raise Exception("Unable to parse data - annotations for example %s are unclear" % example) annotations[example].append(annotation) print "writing bk" bk = ':- modeh(1, positive(+instance)).\n' bk += ':- mode(*, annotated_with(+instance, #annotation)).\n' bk += ':- determination(positive/1, annotated_with/2).\n' bk += '\n\n' for setting in settings: bk += ':- set(%s, %s).\n' % (setting, settings[setting]) bk += '\n\n' for sub_concept in generalizations: for super_concept in generalizations[sub_concept]: bk += 'annotated_with(X, \'%s\') :- annotated_with(X, \'%s\').\n' % (super_concept, sub_concept) bk += '\n' print "writing pos and neg" i = 0 print len(annotations) for example in annotations: i += 1 if i%1000 == 0: print i for concept in annotations[example]: bk += 'annotated_with(\'%s\', \'%s\').\n' % (example, concept) return_dict['bk'] = bk return_dict['pos'] = pos return_dict['neg'] = neg print "done!!!" return return_dict