Exemple #1
0
def construction_schema_objects(series):
    result = WOQLQuery().doctype(series.id,
             label=series.label,
             description=series.comment)
    if series.id in SIMPLE_TYPE_MAP:
        result = result.property(series.id+"Value", "xsd:"+SIMPLE_TYPE_MAP[series.id])
    return result
def construct_prop_dr(series):
    result = []
    if (type(series.domainIncludes) == str) and (',' in series.domainIncludes):
        result.append(WOQLQuery().doctype(series.id + "Domain"))
    if (type(series.rangeIncludes) == str) and (',' in series.rangeIncludes):
        result.append(WOQLQuery().doctype(series.id + "Range"))
    return result
Exemple #3
0
def construct_simple_type_relations():
    result = []
    for key, value in SIMPLE_TYPE_MAP.items():
        if value == "string" and key != "http://schema.org/Thing":
            result.append(WOQLQuery().add_quad(key, "subClassOf", "http://schema.org/Thing" ,"schema"))
        if value == "dateTime" and key != "http://schema.org/DateTime":
            result.append(WOQLQuery().add_quad(key, "subClassOf", "http://schema.org/DateTime" ,"schema"))
    return result
def generateMatchClause(code, type, i):
    """ Bug in python string conversion - have to do it automatically
    """
    match = WOQLQuery().woql_and(
        WOQLQuery().idgen("doc:" + type, [WOQLQuery().string(code)], "v:ID_"+str(i)),
        WOQLQuery().cast(WOQLQuery().string(code), "xsd:string", "v:Label_"+ str(i))
    )
    return match
def create_schema_add_ons(client, queries):
    new_queries = []
    for query_list in queries:
        if len(query_list) > 1:
            new_queries.append(WOQLQuery().woql_and(*query_list))
        elif len(query_list) == 1:
            new_queries.append(query_list[0])
    result_query = WOQLQuery().woql_and(*new_queries)
    return result_query.execute(client)
def generateMultiInsertQuery(codes, type):
    matches = []
    inserts = []
    index = 0
    for code in codes:
        matches.append(generateMatchClause(code, type, index))
        inserts.append(generateInsertClause(code, type, index))
        index = index + 1
    return WOQLQuery().when(WOQLQuery().woql_and(*matches),
                            WOQLQuery().woql_and(*inserts))
def load_airline(series, countries_list):
    if (series['Airline ID'] == -1) or (pd.isna(series['Clean ID'])):
        return None
    query_obj = WOQLQuery().insert(series['Clean ID'],
                                   'Airline',
                                   label=series['Name'])

    clean_country = _clean_id(series['Country'], prefix='country')
    if clean_country in countries_list:
        query_obj.property('registered_in', "doc:" + clean_country)
    return query_obj
Exemple #8
0
def construction_schema_addon(series, type_list):
    result=[]
    if type(series.subTypes) == str:
        for kid in series.subTypes.split(','):
            kid = kid.strip()
            if kid in list(type_list):
                result.append(WOQLQuery().add_quad(kid, "subClassOf", series.id ,"schema"))
    if type(series.subTypeOf) == str:
        for parent in series.subTypeOf.split(','):
            parent = parent.strip()
            if parent in list(type_list):
                result.append(WOQLQuery().add_quad(series.id, "subClassOf", parent ,"schema"))
    return result
def load_csvs(client, csvs):
    """Load the CSVs as input
       Parameters
       ==========
       client : a WOQLClient() connection
       csvs : a dict of all csvs to be input
    """
    for key, url in csvs.items():
        csv = get_csv_variables(url)
        wrangles = get_wrangles()
        inputs = WOQLQuery().woql_and(csv, *wrangles)
        inserts = get_inserts()
        answer = WOQLQuery().when(inputs, inserts)
        answer.execute(client, f"Adding {url} into database")
def get_csv_variables(url):
    """Extracting the data from a CSV and binding it to variables
       Parameters
       ==========
       client : a WOQLClient() connection
       url : string, the URL of the CSV
       """
    csv = WOQLQuery().get(WOQLQuery().woql_as(
        "councillor_a",
        "v:Rep_A").woql_as("councillor_b",
                           "v:Rep_B").woql_as("party_a", "v:Party_A").woql_as(
                               "party_b",
                               "v:Party_B").woql_as("distance",
                                                    "v:Distance")).remote(url)
    return csv
Exemple #11
0
def construction_schema_addon_property(series, type_list):
    result=[WOQLQuery().add_quad(series.id,"rdf:type","owl:ObjectProperty","schema")]
    if (type(series.domainIncludes) == str):
        if (',' in series.domainIncludes):
            for domain in  series.domainIncludes.split(','):
                domain = domain.strip()
                if domain in list(type_list):
                    #result.append(WOQLQuery().add_quad(series.id, "domain", domain, "schema"))
                    result.append(WOQLQuery().add_quad(domain, "subClassOf", series.id+"Domain", "schema"))
            result.append(WOQLQuery().add_quad(series.id, "domain", series.id+"Domain", "schema"))
        else:
            if series.domainIncludes in list(type_list):
                result.append(WOQLQuery().add_quad(series.id, "domain", series.domainIncludes, "schema"))
    if (type(series.rangeIncludes) == str):
        if (',' in series.rangeIncludes):
            for range in series.rangeIncludes.split(','):
                range = range.strip()
                if range in list(type_list):
                    #result.append(WOQLQuery().add_quad(series.id, "range", range, "schema"))
                    result.append(WOQLQuery().add_quad(range, "subClassOf", series.id+"Range", "schema"))
            result.append(WOQLQuery().add_quad(series.id, "range", series.id+"Range", "schema"))
        else:
            if series.rangeIncludes in list(type_list):
                result.append(WOQLQuery().add_quad(series.id, "range", series.rangeIncludes, "schema"))
    if len(result) < 3:
        return []
    return result
def create_schema(client):
    region = WOQLQuery().doctype("Region", label="Region")
    seat = WOQLQuery().doctype("Seats", label="Seats")
    house = WOQLQuery().doctype("House", label="House").\
            property("founder", "Person").\
            property("words",  "string").\
            property("region", "Region").\
            property("heir", "Person").\
            property("overlord", "Person").\
            property("seats", "Seats")
    person = WOQLQuery().doctype("Person", label="Person").\
    property("gender", "string").\
    property("father", "Person").\
    property("mother", "Person").\
    property("aliases", "string").\
    property("spouse", "Person").\
    property("children", "Person")

    return WOQLQuery().woql_and(region, seat, house, person).execute(client, "Create schma for Game of Thrones.")
def load_flight(series, airports, airlines):
    clean_id = f"{series['Airline Code']}_{series['Source Airport ID']}_{series['Destination Airport ID']}"
    query_obj = WOQLQuery().insert(
        clean_id,
        'Flight',
        label=
        f"Flight by {series['Airline Code']} from {series['Source Airport ID']} to {series['Destination Airport ID']}"
    )

    # departs
    if len(series['Source Airport ID']) == 3:
        lookup = 'IATA'
    elif len(series['Source Airport ID']) == 4:
        lookup = 'ICAO'
    else:
        lookup = None

    if lookup is not None:
        filter = airports[lookup] == series['Source Airport ID']
        if filter.any():
            airport_id = airports[filter]['Clean ID'].iloc[0]
            query_obj.property('departs', "doc:" + airport_id)

    # arrives
    if len(series['Destination Airport ID']) == 3:
        lookup = 'IATA'
    elif len(series['Destination Airport ID']) == 4:
        lookup = 'ICAO'
    else:
        lookup = None

    if lookup is not None:
        filter = airports[lookup] == series['Destination Airport ID']
        if filter.any():
            airport_id = airports[filter]['Clean ID'].iloc[0]
            query_obj.property('arrives', "doc:" + airport_id)

    # operated_by
    if len(series['Airline Code']) == 2:
        lookup = 'IATA'
    elif len(series['Airline Code']) == 3:
        lookup = 'ICAO'
    else:
        lookup = None

    if lookup is not None:
        filter = (airlines[lookup] == series['Airline Code'])
        if filter.any():
            airline_id = airlines[filter]['Clean ID'].iloc[0]
            query_obj.property('operated_by', "doc:" + airline_id)

    return query_obj
Exemple #14
0
def get_wrangles():
    wrangles = [
         WOQLQuery().idgen("doc:Party", ["v:Party_A"], "v:Party_A_ID"),
         WOQLQuery().idgen("doc:Party", ["v:Party_B"], "v:Party_B_ID"),
         WOQLQuery().idgen("doc:Representative", ["v:Rep_A"], "v:Rep_A_ID"),
         WOQLQuery().idgen("doc:Representative", ["v:Rep_B"], "v:Rep_B_ID"),
         WOQLQuery().typecast("v:Distance", "xsd:decimal", "v:Similarity"),
         WOQLQuery().idgen("doc:Similarity", ["v:Rep_A", "v:Rep_B"], "v:Rel_ID"),
         WOQLQuery().concat("v:Rep_A similarity v:Distance to v:Rep_B", "v:Rel_Label")
    ]
    return wrangles
def load_country(series):
    if pd.isna(series['Country ID']):
        return None
    query_obj = WOQLQuery().insert(series['Country ID'],
                                   'Country',
                                   label=series['Name'])
    if pd.notna(series['ISO Code']):
        query_obj.property('iso_code', series['ISO Code'])
    if pd.notna(series['FIP Code']):
        query_obj.property('fip_code', series['FIP Code'])
    return query_obj
Exemple #16
0
def get_inserts():
    inserts = WOQLQuery().woql_and(
        WOQLQuery().insert("v:Party_A_ID", "Party", label="v:Party_A"),
        WOQLQuery().insert("v:Party_B_ID", "Party", label="v:Party_B"),
        WOQLQuery().insert("v:Rep_A_ID", "Representative", label="v:Rep_A").property("member_of", "v:Party_A_ID"),
        WOQLQuery().insert("v:Rep_B_ID", "Representative", label="v:Rep_B").property("member_of", "v:Party_B_ID"),
        WOQLQuery().insert("v:Rel_ID", "Similarity", label="v:Rel_Label").
                    property("similar_to", "v:Rep_A_ID").
                    property("similar_to", "v:Rep_B_ID").
                    property("similarity", "v:Similarity")
      )
    return inserts
Exemple #17
0
def create_schema(client):
    """The query which creates the schema
        Parameters
        ==========
        client : a WOQLClient() connection
    """
    schema = WOQLQuery().woql_and(
        WOQLQuery().doctype("Party", label="Party", description="Political Party"),
        WOQLQuery().doctype("Representative", label="Representative", description="An elected member Dublin city council").
            property("member_of", "Party", label="Member of").cardinality(1),
        WOQLQuery().doctype("Similarity", label="Similarity").
            property("similarity", "decimal", label="Similarity").
            property("similar_to", "Representative", label="Similar To").cardinality(2)
        )
    return schema.execute(client, "Creating schema for Dublin voting data")
def extract_data(data, id='event/'):
    """Recursive function to craw through the data and create WOQLQuery objects"""

    if type(data) == dict:
        data_type = data['type']
        WOQLObj = WOQLQuery().insert('doc:'+id, data_type)
        if data_type == 'http://schema.org/DateTime':
            date_value = {"@value" : data['value'], "@type" : "xsd:dateTime"}
            execution_queue.append(WOQLObj.property(data_type+'Value', date_value))
            return
        for prop in data['properties']:
            extract_data(data['properties'][prop], id+prop+'/')
            WOQLObj = WOQLObj.property('http://schema.org/'+prop, 'doc:'+id+prop+'/')
        execution_queue.append(WOQLObj)
    else:
        if '://' in data:
            data_type = 'http://schema.org/URL'
        else:
            data_type = 'http://schema.org/Text'
        WOQLObj = WOQLQuery().insert('doc:'+id, data_type)
        data_obj = {"@value" : data, "@type" : "xsd:string"}
        execution_queue.append(WOQLObj.property(data_type+'Value',data_obj))
        data_type = data['type']
        WOQLObj = WOQLQuery().insert('doc:'+id, data_type)
        if data_type == 'http://schema.org/DateTime':
            date_value = {"@value" : data['value'], "@type" : "xsd:dateTime"}
            execution_queue.append(WOQLObj.property(data_type+'Value', date_value))
            return
        for prop in data['properties']:
            extract_data(data['properties'][prop], id+prop+'/')
            WOQLObj = WOQLObj.property('http://schema.org/'+prop, 'doc:'+id+prop+'/')
        execution_queue.append(WOQLObj)
    else:
        if '://' in data:
            data_type = 'http://schema.org/URL'
        else:
            data_type = 'http://schema.org/Text'
        WOQLObj = WOQLQuery().insert('doc:'+id, data_type)
        data_obj = {"@value" : data, "@type" : "xsd:string"}
        execution_queue.append(WOQLObj.property(data_type+'Value',data_obj))

extract_data(data['microdata'][0])

db_id = "schema_tutorial"
client = WOQLClient(server_url = "http://localhost:6363")
client.connect(key="root", account="admin", user="******")
existing = client.get_metadata(db_id, client.uid())
if not existing:
    client.create_database(db_id, "admin", { "label": "Schema.org Graph", "comment": "Create a graph with Schema.org data"})
else:
    client.db(db_id)
WOQLQuery().woql_and(*execution_queue).execute(client)
def create_schema_objects(client, queries):
    result_query = WOQLQuery().woql_and(*queries)
    return result_query.execute(client)
def generateInsertClause(code, type, i):
    insert = WOQLQuery().woql_and(WOQLQuery().insert("v:ID_" + str(i),
                                                     type,
                                                     label="v:Label_" +
                                                     str(i)))
    return insert
def load_data(client, houses, characters):
    results = []

    for ppl in characters:
        ppl_obj = WOQLQuery().insert("Person_"+str(ppl["Id"]), "Person")
        if len(ppl["Name"]) == 0:
            ppl_obj.label("Unknown")
        else:
            ppl_obj.label(ppl["Name"])
        if ppl["IsFemale"]:
            ppl_obj.property("gender", WOQLQuery().string("Female"))
        else:
            ppl_obj.property("gender", WOQLQuery().string("Male"))
        if ppl["Father"] is not None:
            ppl_obj.property("father", "doc:Person_"+str(ppl["Father"]))
        if ppl["Mother"] is not None:
            ppl_obj.property("mother", "doc:Person_"+str(ppl["Mother"]))
        if ppl["Spouse"] is not None:
            ppl_obj.property("spouse", "doc:Person_"+str(ppl["Spouse"]))
        for child in ppl["Children"]:
            ppl_obj.property("children", "doc:Person_"+str(child))
        for alias in ppl["Aliases"]:
            ppl_obj.property("aliases", WOQLQuery().string(alias))
        results.append(ppl_obj)

    for hus in houses:
        if hus["Region"] is not None:
            results.append(WOQLQuery().insert("Region_"+hus["Region"], "Region", label=hus["Region"]))
        for seat in hus["Seats"]:
            results.append(WOQLQuery().insert("Seats_"+seat, "Seats", label=seat))

        hus_obj = WOQLQuery().insert("hus"+str(hus["Id"]), "House").label(hus["Name"])
        if hus["Region"] is not None:
            hus_obj.property("region", "doc:Region_"+hus["Region"])
        for seat in hus["Seats"]:
            hus_obj.property("seats", "doc:Seats_"+seat)
        if hus["Founder"] is not None:
            hus_obj.property("founder", "doc:Person_"+str(hus["Founder"]))
        if hus["Words"] is not None:
            data_obj = {"@value" : hus["Words"], "@type" : "xsd:string"}
            hus_obj.property("words", data_obj)
        if hus["Heir"] is not None:
            hus_obj.property("heir", "doc:Person_"+str(hus["Heir"]))
        if hus["Overlord"] is not None:
            hus_obj.property("overlord", "doc:Person_"+str(hus["Overlord"]))
        results.append(hus_obj)


    return WOQLQuery().woql_and(*results).execute(client, "Adding data for Game of Thrones.")
def create_schema(client):
    """The query which creates the schema
        Parameters - it uses variables rather than the fluent style as an example
        ==========
        client : a WOQLClient() connection

    """
    base = WOQLQuery().doctype("EphemeralEntity",
                               label="Ephemeral Entity",
                               description="An entity that has a lifespan")
    base.property("lifespan_start", "dateTime", label="Existed From")
    base.property("lifespan_end", "dateTime", label="Existed To")

    country = WOQLQuery().add_class("Country").label("Country").description(
        "A nation state").parent("EphemeralEntity")
    country.property("iso_code", "string", label="ISO Code")
    country.property("fip_code", "string", label="FIP Code")

    airline = WOQLQuery().add_class("Airline").label("Airline").description(
        "An operator of airplane flights").parent("EphemeralEntity")
    airline = airline.property("registered_in",
                               "scm:Country",
                               label="Registered In"),

    airport = WOQLQuery().add_class("Airport").label("Airport").description(
        "An airport where flights terminate").parent("EphemeralEntity")
    airport.property("situated_in", "Country", label="Situated In"),

    flight = WOQLQuery().add_class("Flight").label("Flight").description(
        "A flight between airports").parent("EphemeralEntity")
    flight.property("departs", "Airport", label="Departs")
    flight.property("arrives", "Airport", label="Arrives")
    flight.property("operated_by", "Airline", label="Operated By")

    schema = WOQLQuery().woql_and(base, country, airline, airport, flight)
    return schema.execute(client, "Creating schema for flight data")
    if lookup is not None:
        filter = (airlines[lookup] == series['Airline Code'])
        if filter.any():
            airline_id = airlines[filter]['Clean ID'].iloc[0]
            query_obj.property('operated_by', "doc:" + airline_id)

    return query_obj


flights_query = routes.apply(load_flight,
                             axis=1,
                             airports=airports,
                             airlines=airlines).dropna()

db_id = "pyplane"
client = WOQLClient(server_url="http://localhost:6363")
client.connect(key="root", account="admin", user="******")
existing = client.get_metadata(db_id, client.uid())
if not existing:
    client.create_database(db_id,
                           "admin",
                           label="Flight Graph",
                           description="Create a graph with Open Flights data")
else:
    client.db(db_id)
WOQLQuery().woql_and(*countries_query).execute(client, "Insert countries data")
WOQLQuery().woql_and(*airlines_query).execute(client, "Insert airlines data")
WOQLQuery().woql_and(*airports_query).execute(client, "Insert airports data")
WOQLQuery().woql_and(*flights_query).execute(client, "Insert flights data")