Exemple #1
0
def main_with_explore():
    classes_properties_uris = easysparql.get_all_classes_properties_numerical(RAW_ENDPOINT)
    data, meta_data = data_extraction.data_and_meta_from_class_property_uris(class_property_uris=classes_properties_uris)
    if np.any(np.isnan(data)):
        print "there is a nan in the data"
        print "**************************"
    else:
        print "no nans in the data"
    data_extraction.save_data_and_meta_to_files(data=data, meta_data=meta_data)
    model = learning.train_with_data_and_meta(data=data, meta_data=meta_data)
    meta_with_clusters = learning.get_cluster_for_meta(training_meta=meta_data, testing_meta=meta_data)
    #print "model num_of_clusters: %d" % model.n_clusters
    #print "cluster centers: %s" % str(model.cluster_centers_)
    learning.test_with_data_and_meta(model=model, data=data, meta_data=meta_with_clusters)
Exemple #2
0
def main_with_class_explore():
    class_uri = 'http://dbpedia.org/ontology/Person'
    properties = easysparql.get_numerical_properties_for_class_tbox(endpoint=RAW_ENDPOINT, class_uri=class_uri)
    if properties is None:
        return
    class_property_combinations = zip((len(properties) * [class_uri]), properties)
    # print class_property_combinations
    data, meta_data = data_extraction.data_and_meta_from_class_property_uris(
        class_property_uris=class_property_combinations)
    # data_extraction.save_data_and_meta_to_files(data=data, meta_data=meta_data)
    model = learning.train_with_data_and_meta(data=data, meta_data=meta_data)
    meta_with_clusters = learning.get_cluster_for_meta(training_meta=meta_data, testing_meta=meta_data)
    learning.test_with_data_and_meta(model=model, data=data, meta_data=meta_with_clusters)
    # now testing some files
    test_data, test_meta_data = data_extraction.data_and_meta_from_files(['person_waist.csv','person_hipsize.csv',
                                                          'person_bustsize.csv'])
    learning.predict(model, data=test_data, meta_data=test_meta_data)
Exemple #3
0
def main():
    class_property_combinations = [
        ('http://xmlns.com/foaf/0.1/Person', 'http://dbpedia.org/ontology/numberOfMatches'),
        # ('http://schema.org/Place', 'http://dbpedia.org/property/longew'),
        # ('http://schema.org/Place', 'http://dbpedia.org/property/latns'),
        ('http://schema.org/Place', 'http://www.georss.org/georss/point'),
        # ('http://schema.org/Place', 'http://dbpedia.org/property/latm'),
        # ('http://schema.org/Place', 'http://dbpedia.org/property/longm'),
        ('http://schema.org/Place', 'http://dbpedia.org/property/latd'),
        ('http://schema.org/Place', 'http://dbpedia.org/property/longd'),
    ]
    class_property_combinations_test = [
        # ('http://schema.org/Place', 'http://dbpedia.org/property/latm'),
        # ('http://schema.org/Place', 'http://dbpedia.org/property/longm'),
        ('http://schema.org/Place', 'http://dbpedia.org/property/latd'),
        ('http://schema.org/Place', 'http://dbpedia.org/property/longd'),
    ]

    data1, meta_data1 = data_extraction.data_and_meta_from_class_property_uris(class_property_combinations)
    data2, meta_data2 = data_extraction.data_and_meta_from_files(['novHighC.csv'])
    data, meta_data = data_manipulation.merge_data_and_meta_naive(data1=data1, meta_data1=meta_data1, data2=data2,
                                                                  meta_data2=meta_data2)
    for clus, md in enumerate(meta_data):
        print "cluster %d => type: %s" % (clus, md["type"])
    model = learning.train_with_data_and_meta(data=data, meta_data=meta_data)

    test_data1, test_meta_data1 = data_extraction.data_and_meta_from_class_property_uris(
        class_property_combinations_test)

    test_data2, test_meta_data2 = data_extraction.data_and_meta_from_files(['mayHighC.csv'])
    # merge the two data sets
    test_data, test_meta_data = data_manipulation.merge_data_and_meta_naive(
        data1=test_data1, meta_data1=test_meta_data1, data2=test_data2, meta_data2=test_meta_data2)

    # test_meta_data_with_clusters = learning.get_cluster_for_meta(training_meta=meta_data, testing_meta=test_meta_data)
    # learning.test_with_data_and_meta(model=model, data=test_data, meta_data=test_meta_data_with_clusters)
    learning.predict(model=model, data=test_data, meta_data=test_meta_data)
Exemple #4
0
def explore_and_train_tbox(endpoint=None, model_id=None):
    if endpoint is None:
        print "explore_and_train_tbox> endpoint is None"
        return
    if model_id is None:
        print "explore_and_train_tbox> model_id should not be None"
        return
    try:
        update_progress_func = partial(update_model_progress_for_partial,
                                       model_id)
        update_model_state(
            model_id=model_id,
            new_state=MLModel.RUNNING,
            new_progress=0,
            new_notes="Extracting numerical class/property combinations")
        # Safe function
        classes_properties_uris = easysparql.get_all_classes_properties_numerical(
            endpoint=endpoint)
        update_model_state(
            model_id=model_id,
            new_progress=0,
            new_notes="extracting values from gathered class/property")
        data, meta_data = data_extraction.data_and_meta_from_class_property_uris(
            endpoint=endpoint,
            class_property_uris=classes_properties_uris,
            update_func=update_progress_func,
            isnumericfilter=True)
        update_model_state(model_id=model_id,
                           new_progress=0,
                           new_notes="training the model")
        if data is None:
            update_model_state(
                model_id=model_id,
                new_progress=0,
                new_state=MLModel.STOPPED,
                new_notes="No data is extracted from the endpoint")
            return
        if np.any(np.isnan(data)):
            print "explore_and_train_tbox> there is a nan in the data"
            print "**************************"
        else:
            print "explore_and_train_tbox> no nans in the data"
        model = learning.train_with_data_and_meta(
            data=data, meta_data=meta_data, update_func=update_progress_func)
        update_model_state(model_id=model_id,
                           new_progress=0,
                           new_notes="organizing the clusters")
        meta_with_clusters = learning.get_cluster_for_meta(
            training_meta=meta_data,
            testing_meta=meta_data,
            update_func=update_progress_func)

        update_model_state(model_id=model_id,
                           new_progress=0,
                           new_notes="Saving the model data")
        model_file_name = data_extraction.save_model(model=model,
                                                     meta_data=meta_data,
                                                     file_name=str(model_id) +
                                                     " - ")
        if model_file_name is not None:
            m = MLModel.objects.filter(id=model_id)
            if len(m) == 1:
                m = m[0]
                m.file_name = model_file_name
                m.save()
                update_model_state(model_id=model_id,
                                   new_progress=100,
                                   new_state=MLModel.COMPLETE,
                                   new_notes="Completed")
            else:
                update_model_state(model_id=model_id,
                                   new_progress=0,
                                   new_state=MLModel.STOPPED,
                                   new_notes="model is deleted")
        else:
            update_model_state(model_id=model_id,
                               new_progress=0,
                               new_state=MLModel.STOPPED,
                               new_notes="Error Saving the model")
    except Exception as e:
        print "explore_and_train_tbox> Exception %s" % str(e)
        traceback.print_exc()
        update_model_state(model_id=model_id,
                           new_state=MLModel.STOPPED,
                           new_notes="Not captured error: " + str(e))
Exemple #5
0
def main_with_local_files():
    data, meta_data = data_extraction.data_and_meta_from_files(get_local_dbpedia_files())
    model = learning.train_with_data_and_meta(data=data, meta_data=meta_data)
    meta_data_with_clusters = learning.get_cluster_for_meta(training_meta=meta_data, testing_meta=meta_data)
    learning.test_with_data_and_meta(model=model, data=data, meta_data=meta_data_with_clusters)
Exemple #6
0
def explore_and_train_abox(endpoint=None,
                           model_id=None,
                           classes_uris=[],
                           min_num_of_objects=90):
    if endpoint is None:
        print "explore_and_train_abox> endpoint is None"
        return
    if model_id is None:
        print "explore_and_train_abox> model_id should not be None"
        return
    try:
        update_progress_func = partial(update_model_progress_for_partial,
                                       model_id)
        update_model_state(
            model_id=model_id,
            new_state=MLModel.RUNNING,
            new_progress=0,
            new_notes="Extracting numerical class/property combinations")
        classes_properties_uris = []
        for idx, class_uri in enumerate(classes_uris):
            update_progress_func(int(idx * 1.0 / len(classes_uris) * 100))
            # properties = easysparql.get_numerical_properties_for_class_abox(endpoint=endpoint, class_uri=class_uri,
            #                                                                 raiseexception=True)
            # properties = easysparql.get_numerical_properties_for_class_abox_using_half_split(endpoint=endpoint,
            #                                                                                  class_uri=class_uri,
            #                                                                                  raiseexception=True,
            #                                                                                  lower_bound=1,
            #                                                                                  upper_bound=100000,
            #                                                                                  first_time=True)
            properties = easysparql.get_properties_for_class_abox(
                endpoint=endpoint, class_uri=class_uri, raiseexception=True)
            for prop in properties:
                classes_properties_uris.append((class_uri, prop))
        update_progress_func(100)
        update_model_state(
            model_id=model_id,
            new_progress=0,
            new_notes="extracting values from gathered class/property")
        data, meta_data = data_extraction.data_and_meta_from_class_property_uris(
            endpoint=endpoint,
            class_property_uris=classes_properties_uris,
            update_func=update_progress_func,
            isnumericfilter=False,
            min_num_of_objects=min_num_of_objects)
        update_model_state(model_id=model_id,
                           new_progress=0,
                           new_notes="training the model")
        if data is None:
            update_model_state(
                model_id=model_id,
                new_progress=0,
                new_state=MLModel.STOPPED,
                new_notes="No data is extracted from the endpoint")
            return
        if np.any(np.isnan(data)):
            print "explore_and_train_abox> there is a nan in the data"
            print "**************************"
        else:
            print "explore_and_train_abox> no nans in the data"
        model = learning.train_with_data_and_meta(
            data=data, meta_data=meta_data, update_func=update_progress_func)
        if model is None:
            update_model_state(model_id=model_id,
                               new_state=MLModel.STOPPED,
                               new_notes="leaning failed as model is None")
            return
        update_model_state(model_id=model_id,
                           new_progress=0,
                           new_notes="organizing the clusters")
        meta_with_clusters = learning.get_cluster_for_meta(
            training_meta=meta_data,
            testing_meta=meta_data,
            update_func=update_progress_func)
        # Now I'm not using the computed data here
        # update_model_state(model_id=model_id, new_progress=0, new_notes="computing the score of the trained model")
        # learning.test_with_data_and_meta(model=model, data=data, meta_data=meta_with_clusters,
        #                                  update_func=update_progress_func)
        update_model_state(model_id=model_id,
                           new_progress=0,
                           new_notes="Saving the model data")
        model_file_name = data_extraction.save_model(model=model,
                                                     meta_data=meta_data,
                                                     file_name=str(model_id) +
                                                     " - ")
        if model_file_name is not None:
            m = MLModel.objects.filter(id=model_id)
            if len(m) == 1:
                m = m[0]
                m.file_name = model_file_name
                m.save()
                update_model_state(model_id=model_id,
                                   new_progress=100,
                                   new_state=MLModel.COMPLETE,
                                   new_notes="Completed")
            else:
                update_model_state(model_id=model_id,
                                   new_progress=0,
                                   new_state=MLModel.STOPPED,
                                   new_notes="model is deleted")
        else:
            update_model_state(model_id=model_id,
                               new_progress=0,
                               new_state=MLModel.STOPPED,
                               new_notes="Error Saving the model")
    except Exception as e:
        print "explore_and_train_abox> Exception %s" % str(e)
        traceback.print_exc()
        update_model_state(model_id=model_id,
                           new_state=MLModel.STOPPED,
                           new_notes="Raised error: " + str(e))