Esempio n. 1
0
def obtainTypeLabels(credentials=None, maxLabels=10):
    query = [{
        'type': '/type/type',
        'key': [{
            'namespace': [{
                'key': [{
                    'namespace': '/'
                }]
            }]
        }],
        'id': [{}],
        'name': [{}],
        'limit': min(100, maxLabels)
    }]

    #
    # Ask Freebase for metadata about the indicated type
    #
    typeLabels = []

    def typeLabelBuilder(resultList):
        for result in resultList:
            typeLabels.append(result["id"][0]["value"])

    fb.readBatch(query, typeLabelBuilder, credentials, maxResults=maxLabels)

    typeLabels = filter(lambda x: not x.startswith("/type"), typeLabels)
    typeLabels = filter(lambda x: not x.startswith("/freebase"), typeLabels)
    typeLabels = filter(lambda x: not x.startswith("/measurement_unit"),
                        typeLabels)
    typeLabels = filter(lambda x: not x.startswith("/common"), typeLabels)

    return typeLabels
def populateSchemaTypesFile(schemaElts, schemaNamesFile, schemaTypesFile,
                            credentials):
    schemaNames = []
    schemaTypes = []

    # Iterate through all the schema elements.  These are all identifiers for objects of type "/type/property"
    for schemaElt in schemaElts:
        if not schemaElt.startswith("/"):
            schemaNames.append(schemaElt)
            schemaTypes.append("string")
            continue

        # Grab type info for the given named property.  We're interested in the human-readable 'name'
        # and the 'expected_type' structure
        query = [{
            "type": "/type/property",
            "id": schemaElt,
            "name": None,
            "expected_type": [{}]
        }]
        results = []

        def resultBuilder(resultList):
            for result in resultList:
                results.append(result)

        fb.readBatch(query, resultBuilder, credentials, maxResults=1)

        if len(results) > 0:
            r = results[0]
            schemaNames.append(r["name"])
            stName = "string"

            if len(r["expected_type"]) > 0:
                propertyDataType = r["expected_type"][0]["id"]
                if propertyDataType.startswith("/type/"):
                    stName = propertyDataType
            schemaTypes.append(stName)
        else:
            schemaNames.append("")
            schemaTypes.append("")

    # Now record the data so future runs can benefit
    f = open(schemaNamesFile, "w")
    try:
        f.write("\t".join(schemaNames))
    finally:
        f.close()

    f = open(schemaTypesFile, "w")
    try:
        f.write("\t".join(schemaTypes))
    finally:
        f.close()

    # All done
    return schemaNames, schemaTypes
def populateSchemaTypesFile(schemaElts, schemaNamesFile, schemaTypesFile, credentials):
  schemaNames = []
  schemaTypes = []

  # Iterate through all the schema elements.  These are all identifiers for objects of type "/type/property"
  for schemaElt in schemaElts:
    if not schemaElt.startswith("/"):
      schemaNames.append(schemaElt)
      schemaTypes.append("string")
      continue

    # Grab type info for the given named property.  We're interested in the human-readable 'name'
    # and the 'expected_type' structure
    query = [{"type": "/type/property",
              "id": schemaElt,
              "name": None,
              "expected_type": [{}]
              }]
    results = []
    def resultBuilder(resultList):
      for result in resultList:
        results.append(result)
    fb.readBatch(query, resultBuilder, credentials, maxResults=1)

    if len(results) > 0:
      r = results[0]
      schemaNames.append(r["name"])
      stName = "string"

      if len(r["expected_type"]) > 0:
        propertyDataType = r["expected_type"][0]["id"]
        if propertyDataType.startswith("/type/"):
          stName = propertyDataType
      schemaTypes.append(stName)
    else:
      schemaNames.append("")
      schemaTypes.append("")

  # Now record the data so future runs can benefit
  f = open(schemaNamesFile, "w")
  try:
    f.write("\t".join(schemaNames))
  finally:
    f.close()

  f = open(schemaTypesFile, "w")
  try:
    f.write("\t".join(schemaTypes))
  finally:
    f.close()

  # All done
  return schemaNames, schemaTypes
Esempio n. 4
0
def obtainTypeLabels(credentials=None, maxLabels=10):
  query = [{ 'type': '/type/type',
             'key': [{'namespace': [{'key': [{'namespace':'/'}]}]}],
             'id': [{}],
             'name': [{}],
             'limit': min(100, maxLabels)}]

  #
  # Ask Freebase for metadata about the indicated type
  #
  typeLabels = []
  def typeLabelBuilder(resultList):
    for result in resultList:
      typeLabels.append(result["id"][0]["value"])
  fb.readBatch(query, typeLabelBuilder, credentials, maxResults=maxLabels)

  typeLabels = filter(lambda x: not x.startswith("/type"), typeLabels)
  typeLabels = filter(lambda x: not x.startswith("/freebase"), typeLabels)
  typeLabels = filter(lambda x: not x.startswith("/measurement_unit"), typeLabels)
  typeLabels = filter(lambda x: not x.startswith("/common"), typeLabels)

  return typeLabels
Esempio n. 5
0
def obtainDatabase(typeLabel, credentials=None):
    #
    # Using a MQL query, grab the schema for the given typeLabel
    #
    startTime = time.time()
    typeDetailQuery = [{
        'type':
        '/type/type',
        'id':
        typeLabel,
        'name': [],
        '/type/type/properties': [{
            'id': None,
            'type': [],
            'name': None,
            'expected_type': None
        }],
        'guid':
        None,
        'limit':
        1
    }]

    #
    # Ask Freebase for metadata about the indicated type
    #
    typeProfiles = []

    def typeProfileBuilder(resultList):
        typeProfiles.extend(resultList)

    fb.readBatch(typeDetailQuery,
                 typeProfileBuilder,
                 credentials,
                 maxResults=1)

    #
    # Iterate through all discovered types (probably just 1)
    #
    schema = []
    tuples = []
    for discoveredType in typeProfiles[0:1]:
        #
        # Create a query based on the type-specific profile and
        # grab the type-appropriate data
        #
        typeSpecificQuery = {
            'type': typeLabel,
            'name': None,
            'id': None,
            'guid': None,
            'limit': 1
        }

        schema = ["name"]
        for discoveredProperty in discoveredType["/type/type/properties"]:
            schema.append(discoveredProperty["id"])
            typeSpecificQuery[discoveredProperty["id"]] = [{}]
            print "DP", discoveredProperty

        #
        # Send the query to Freebase
        #
        typeSpecificQuery = [typeSpecificQuery]
        typeInstances = []

        def typeInstanceBuilder(resultList):
            typeInstances.extend(resultList)

        fb.readBatch(typeSpecificQuery,
                     typeInstanceBuilder,
                     credentials,
                     maxResults=10)

        #
        # Dump the sample data for this type.  Equivalent to a relation.
        #
        for elt in typeInstances:
            newTuple = []
            try:
                newTuple.append(str(elt["name"]))
            except UnicodeEncodeError:
                newTuple.append("")

            for p in discoveredType["/type/type/properties"]:
                valueList = elt[p["id"]]
                if len(valueList) > 0:
                    if valueList[0].has_key("name"):
                        v = valueList[0]["name"]
                        if v is None:
                            newTuple.append("")
                        else:
                            try:
                                newTuple.append(str(v))
                            except UnicodeEncodeError:
                                newTuple.append("")
                    else:
                        v = valueList[0]["value"]
                        if v is None:
                            newTuple.append("")
                        else:
                            try:
                                newTuple.append(str(v))
                            except UnicodeEncodeError:
                                newTuple.append("")
                else:
                    newTuple.append("")
            tuples.append(tuple(newTuple))

    #
    # Return schema info and the sample results
    #
    endTime = time.time()
    return typeLabel, schema, tuples
Esempio n. 6
0
def obtainDatabase(typeLabel, credentials=None):
  #
  # Using a MQL query, grab the schema for the given typeLabel
  #
  startTime = time.time()
  typeDetailQuery = [{ 'type': '/type/type',
                       'id': typeLabel,
                       'name': [],
                       '/type/type/properties': [{'id': None, 'type': [], 'name': None, 'expected_type': None}],
                       'guid': None,
                       'limit': 1}]

  #
  # Ask Freebase for metadata about the indicated type
  #
  typeProfiles = []
  def typeProfileBuilder(resultList):
    typeProfiles.extend(resultList)
  fb.readBatch(typeDetailQuery, typeProfileBuilder, credentials, maxResults=1)

  #
  # Iterate through all discovered types (probably just 1)
  #
  schema = []
  tuples = []
  for discoveredType in typeProfiles[0:1]:
    #
    # Create a query based on the type-specific profile and
    # grab the type-appropriate data
    #
    typeSpecificQuery = { 'type': typeLabel,
                          'name': None,
                          'id': None,
                          'guid': None,
                          'limit': 1}

    schema = ["name"]
    for discoveredProperty in discoveredType["/type/type/properties"]:
      schema.append(discoveredProperty["id"])
      typeSpecificQuery[discoveredProperty["id"]] = [{}]
      print "DP", discoveredProperty
      
    #
    # Send the query to Freebase
    #
    typeSpecificQuery = [typeSpecificQuery]
    typeInstances = []
    def typeInstanceBuilder(resultList):
      typeInstances.extend(resultList)

    fb.readBatch(typeSpecificQuery, typeInstanceBuilder, credentials, maxResults=10)

    #
    # Dump the sample data for this type.  Equivalent to a relation.
    #
    for elt in typeInstances:
      newTuple = []
      try:
        newTuple.append(str(elt["name"]))
      except UnicodeEncodeError:
        newTuple.append("")
        
      for p in discoveredType["/type/type/properties"]:
        valueList = elt[p["id"]]
        if len(valueList) > 0:
          if valueList[0].has_key("name"):
            v = valueList[0]["name"]
            if v is None:
              newTuple.append("")
            else:
              try:
                newTuple.append(str(v))
              except UnicodeEncodeError:
                newTuple.append("")
          else:
            v = valueList[0]["value"]
            if v is None:
              newTuple.append("")
            else:
              try:
                newTuple.append(str(v))
              except UnicodeEncodeError:
                newTuple.append("")
        else:
          newTuple.append("")
      tuples.append(tuple(newTuple))

  #
  # Return schema info and the sample results
  #
  endTime = time.time()
  return typeLabel, schema, tuples