def obtainTypeLabels(credentials=None, maxLabels=10): query = [{ 'type': '/type/type', 'key': [{ 'namespace': [{ 'key': [{ 'namespace': '/' }] }] }], 'id': [{}], 'name': [{}], 'limit': min(100, maxLabels) }] # # Ask Freebase for metadata about the indicated type # typeLabels = [] def typeLabelBuilder(resultList): for result in resultList: typeLabels.append(result["id"][0]["value"]) fb.readBatch(query, typeLabelBuilder, credentials, maxResults=maxLabels) typeLabels = filter(lambda x: not x.startswith("/type"), typeLabels) typeLabels = filter(lambda x: not x.startswith("/freebase"), typeLabels) typeLabels = filter(lambda x: not x.startswith("/measurement_unit"), typeLabels) typeLabels = filter(lambda x: not x.startswith("/common"), typeLabels) return typeLabels
def populateSchemaTypesFile(schemaElts, schemaNamesFile, schemaTypesFile, credentials): schemaNames = [] schemaTypes = [] # Iterate through all the schema elements. These are all identifiers for objects of type "/type/property" for schemaElt in schemaElts: if not schemaElt.startswith("/"): schemaNames.append(schemaElt) schemaTypes.append("string") continue # Grab type info for the given named property. We're interested in the human-readable 'name' # and the 'expected_type' structure query = [{ "type": "/type/property", "id": schemaElt, "name": None, "expected_type": [{}] }] results = [] def resultBuilder(resultList): for result in resultList: results.append(result) fb.readBatch(query, resultBuilder, credentials, maxResults=1) if len(results) > 0: r = results[0] schemaNames.append(r["name"]) stName = "string" if len(r["expected_type"]) > 0: propertyDataType = r["expected_type"][0]["id"] if propertyDataType.startswith("/type/"): stName = propertyDataType schemaTypes.append(stName) else: schemaNames.append("") schemaTypes.append("") # Now record the data so future runs can benefit f = open(schemaNamesFile, "w") try: f.write("\t".join(schemaNames)) finally: f.close() f = open(schemaTypesFile, "w") try: f.write("\t".join(schemaTypes)) finally: f.close() # All done return schemaNames, schemaTypes
def populateSchemaTypesFile(schemaElts, schemaNamesFile, schemaTypesFile, credentials): schemaNames = [] schemaTypes = [] # Iterate through all the schema elements. These are all identifiers for objects of type "/type/property" for schemaElt in schemaElts: if not schemaElt.startswith("/"): schemaNames.append(schemaElt) schemaTypes.append("string") continue # Grab type info for the given named property. We're interested in the human-readable 'name' # and the 'expected_type' structure query = [{"type": "/type/property", "id": schemaElt, "name": None, "expected_type": [{}] }] results = [] def resultBuilder(resultList): for result in resultList: results.append(result) fb.readBatch(query, resultBuilder, credentials, maxResults=1) if len(results) > 0: r = results[0] schemaNames.append(r["name"]) stName = "string" if len(r["expected_type"]) > 0: propertyDataType = r["expected_type"][0]["id"] if propertyDataType.startswith("/type/"): stName = propertyDataType schemaTypes.append(stName) else: schemaNames.append("") schemaTypes.append("") # Now record the data so future runs can benefit f = open(schemaNamesFile, "w") try: f.write("\t".join(schemaNames)) finally: f.close() f = open(schemaTypesFile, "w") try: f.write("\t".join(schemaTypes)) finally: f.close() # All done return schemaNames, schemaTypes
def obtainTypeLabels(credentials=None, maxLabels=10): query = [{ 'type': '/type/type', 'key': [{'namespace': [{'key': [{'namespace':'/'}]}]}], 'id': [{}], 'name': [{}], 'limit': min(100, maxLabels)}] # # Ask Freebase for metadata about the indicated type # typeLabels = [] def typeLabelBuilder(resultList): for result in resultList: typeLabels.append(result["id"][0]["value"]) fb.readBatch(query, typeLabelBuilder, credentials, maxResults=maxLabels) typeLabels = filter(lambda x: not x.startswith("/type"), typeLabels) typeLabels = filter(lambda x: not x.startswith("/freebase"), typeLabels) typeLabels = filter(lambda x: not x.startswith("/measurement_unit"), typeLabels) typeLabels = filter(lambda x: not x.startswith("/common"), typeLabels) return typeLabels
def obtainDatabase(typeLabel, credentials=None): # # Using a MQL query, grab the schema for the given typeLabel # startTime = time.time() typeDetailQuery = [{ 'type': '/type/type', 'id': typeLabel, 'name': [], '/type/type/properties': [{ 'id': None, 'type': [], 'name': None, 'expected_type': None }], 'guid': None, 'limit': 1 }] # # Ask Freebase for metadata about the indicated type # typeProfiles = [] def typeProfileBuilder(resultList): typeProfiles.extend(resultList) fb.readBatch(typeDetailQuery, typeProfileBuilder, credentials, maxResults=1) # # Iterate through all discovered types (probably just 1) # schema = [] tuples = [] for discoveredType in typeProfiles[0:1]: # # Create a query based on the type-specific profile and # grab the type-appropriate data # typeSpecificQuery = { 'type': typeLabel, 'name': None, 'id': None, 'guid': None, 'limit': 1 } schema = ["name"] for discoveredProperty in discoveredType["/type/type/properties"]: schema.append(discoveredProperty["id"]) typeSpecificQuery[discoveredProperty["id"]] = [{}] print "DP", discoveredProperty # # Send the query to Freebase # typeSpecificQuery = [typeSpecificQuery] typeInstances = [] def typeInstanceBuilder(resultList): typeInstances.extend(resultList) fb.readBatch(typeSpecificQuery, typeInstanceBuilder, credentials, maxResults=10) # # Dump the sample data for this type. Equivalent to a relation. # for elt in typeInstances: newTuple = [] try: newTuple.append(str(elt["name"])) except UnicodeEncodeError: newTuple.append("") for p in discoveredType["/type/type/properties"]: valueList = elt[p["id"]] if len(valueList) > 0: if valueList[0].has_key("name"): v = valueList[0]["name"] if v is None: newTuple.append("") else: try: newTuple.append(str(v)) except UnicodeEncodeError: newTuple.append("") else: v = valueList[0]["value"] if v is None: newTuple.append("") else: try: newTuple.append(str(v)) except UnicodeEncodeError: newTuple.append("") else: newTuple.append("") tuples.append(tuple(newTuple)) # # Return schema info and the sample results # endTime = time.time() return typeLabel, schema, tuples
def obtainDatabase(typeLabel, credentials=None): # # Using a MQL query, grab the schema for the given typeLabel # startTime = time.time() typeDetailQuery = [{ 'type': '/type/type', 'id': typeLabel, 'name': [], '/type/type/properties': [{'id': None, 'type': [], 'name': None, 'expected_type': None}], 'guid': None, 'limit': 1}] # # Ask Freebase for metadata about the indicated type # typeProfiles = [] def typeProfileBuilder(resultList): typeProfiles.extend(resultList) fb.readBatch(typeDetailQuery, typeProfileBuilder, credentials, maxResults=1) # # Iterate through all discovered types (probably just 1) # schema = [] tuples = [] for discoveredType in typeProfiles[0:1]: # # Create a query based on the type-specific profile and # grab the type-appropriate data # typeSpecificQuery = { 'type': typeLabel, 'name': None, 'id': None, 'guid': None, 'limit': 1} schema = ["name"] for discoveredProperty in discoveredType["/type/type/properties"]: schema.append(discoveredProperty["id"]) typeSpecificQuery[discoveredProperty["id"]] = [{}] print "DP", discoveredProperty # # Send the query to Freebase # typeSpecificQuery = [typeSpecificQuery] typeInstances = [] def typeInstanceBuilder(resultList): typeInstances.extend(resultList) fb.readBatch(typeSpecificQuery, typeInstanceBuilder, credentials, maxResults=10) # # Dump the sample data for this type. Equivalent to a relation. # for elt in typeInstances: newTuple = [] try: newTuple.append(str(elt["name"])) except UnicodeEncodeError: newTuple.append("") for p in discoveredType["/type/type/properties"]: valueList = elt[p["id"]] if len(valueList) > 0: if valueList[0].has_key("name"): v = valueList[0]["name"] if v is None: newTuple.append("") else: try: newTuple.append(str(v)) except UnicodeEncodeError: newTuple.append("") else: v = valueList[0]["value"] if v is None: newTuple.append("") else: try: newTuple.append(str(v)) except UnicodeEncodeError: newTuple.append("") else: newTuple.append("") tuples.append(tuple(newTuple)) # # Return schema info and the sample results # endTime = time.time() return typeLabel, schema, tuples