Esempio n. 1
0
    def process_item(self, item, spider):
      keyid = item['keyid']
      nationid = item['nationid']
      answers = item['answers']
      label = item['label']
      data = item['data']


      dataset_uri = "%s/dataset" % (BASE_URI)
      question_uri = "%s/question/%s" % (BASE_URI, keyid)
      triples = triple(question_uri, RDFS['label'], label)
      for v in range(1, len(answers)):
       triples += triple(EB['q%sa%s' % (keyid, v)], RDF['type'], QB['MeasureProperty'])
       triples += triple(EB['q%sa%s' % (keyid, v)], RDFS['label'], "%s (%%)" % answers[v])

      country_uri = countries[str(nationid)]['uri']

      for r in data:
        (month, year) = r[0].split('/')
        observation_uri = "%s/observation/%s/%s/%s/%s" % (BASE_URI, nationid, keyid, year, month)
        survey_uri = "%s/survey/%s/%s" % (BASE_URI, year, month)
        triples += triple(observation_uri, RDF['type'], QB['Observation'])
        triples += triple(observation_uri, RDFS['label'], 'All results from %s for "%s" in survey %s.%s' % (countries[str(nationid)]['label'], label, month, year))
        triples += triple(observation_uri, QB['dataSet'], dataset_uri)
        triples += triple(observation_uri, EB['survey'], survey_uri)
        triples += triple(observation_uri, EB['surveyMonth'], int(month))
        triples += triple(observation_uri, EB['surveyYear'], int(year))
        triples += triple(observation_uri, EB['question'], question_uri)
        triples += triple(observation_uri, EB['country'], country_uri)
        for v in range(1, len(r)):
          m = re.search("^\s*(.+)%", r[v])
          if m:
            triples += triple(observation_uri, EB['q%sa%s' % (keyid, v)], float(m.group(1)))

        survey_slice_uri = "%s/slice/%s/%s" % (BASE_URI, nationid, keyid)
        question_slice_uri = "%s/slice/%s/-/%s/%s" % (BASE_URI, nationid, year, month)
        country_slice_uri = "%s/slice/-/%s/%s/%s" % (BASE_URI, keyid, year, month)

        triples += triple(survey_slice_uri, RDFS['label'], 'All results from %s for "%s"' % (countries[str(nationid)]['label'], label))
        triples += triple(survey_slice_uri, QB['observation'], observation_uri)
        triples += triple(survey_slice_uri, EB['question'], question_uri)
        triples += triple(survey_slice_uri, EB['country'], country_uri)

        triples += triple(question_slice_uri, RDFS['label'], "All results from %s in survey %s.%s" % ( countries[str(nationid)]['label'], month, year))
        triples += triple(question_slice_uri, QB['observation'], observation_uri)
        triples += triple(question_slice_uri, EB['survey'], survey_uri)
        triples += triple(question_slice_uri, EB['country'], country_uri)

        triples += triple(country_slice_uri, RDFS['label'], 'All results for "%s" in survey %s.%s' % (label, month, year))
        triples += triple(country_slice_uri, QB['observation'], observation_uri)
        triples += triple(country_slice_uri, EB['survey'], survey_uri)
        triples += triple(country_slice_uri, EB['question'], question_uri)

        triples += triple(dataset_uri, QB['slice'], survey_slice_uri)
        triples += triple(dataset_uri, QB['slice'], question_slice_uri)
        triples += triple(dataset_uri, QB['slice'], country_slice_uri)

        self.file.write(triples)
        triples = ''
      return item      
Esempio n. 2
0
fout.write("# Found %d distinct resources\n" % len(resources))
if opts.weights:
    fout.write("# Weights: %s\n" % ", ".join(opts.weights))


nummatches = opts.nummatches
if nummatches > len(resources) - 1:
    nummatches = len(resources) - 1

std = numpy.std(distances)

for r in resources:
    index = resources[r]
    try:
        node = opts.uripattern % hashlib.md5(r).hexdigest()
        fout.write(triple(r, "http://vocab.org/terms/similarThings", node))
        fout.write(triple(node, "a", "http://www.w3.org/1999/02/22-rdf-syntax-ns#Seq"))
        score_base = distances[index][sorted_distance_args[index][1]]
        for i in range(1, nummatches + 1):
            score = distances[index][sorted_distance_args[index][i]]
            if score > score_base + std:
                break
            #    print "match %s has score %s (%s)" % (i, distances[index][sorted_distance_args[index][i]] , resource_index[sorted_distance_args[index][i]])
            try:
                fout.write(
                    triple(
                        node,
                        "http://www.w3.org/1999/02/22-rdf-syntax-ns#_%d" % (i),
                        resource_index[sorted_distance_args[index][i]],
                    )
                )