Esempio n. 1
0
        else:
            unmatched_art.append(art)
    elif not 'title' in art:
        null_art_indexes.append(index)

# To remove null entries from list, we must move in reverse
# to preserve list order as we remove
null_art_indexes.reverse()
for index in null_art_indexes:
    playa_json.pop(index)

#unmatched_art_file = open('./results/unmatched_art.json', 'w')
unmatched_art_file = codecs.open('./results/unmatched_art.json', 'w', "utf-8")
json_content = json.dumps(unmatched_art, sort_keys=True, indent=4)
json_stripped = json_content.strip(codecs.BOM_UTF8)
json_stripped_cleaned = convert_html_entities(json_stripped)
unmatched_art_file.write(json_stripped_cleaned)

#result_file = open('./results/art_data_and_locations.json', 'w')
result_file = codecs.open('./results/art_data_and_locations.json', 'w',
                          "utf-8")
json_content = json.dumps(playa_json, sort_keys=True, indent=4)
json_stripped = json_content.strip(codecs.BOM_UTF8)
json_stripped_cleaned = convert_html_entities(json_stripped)
result_file.write(json_stripped_cleaned)
#result_file.write(convert_html_entities(json.dumps(playa_json, sort_keys=True, indent=4)))

if len(unmatched_art) > 0:
    print "Location not determined for " + str(
        len(unmatched_art)) + " art pieces"
        #print "Best match for " + event['name'] + " : " + max_match_location['name'] + " (confidence: " + str(max_match) + ")"
        if max_match > MATCH_THRESHOLD:
            # Match found
            if 'latitude' in max_match_location and max_match_location['latitude'] != "":
                event['latitude'] = max_match_location['latitude']
                event['longitude'] = max_match_location['longitude']
            #event['location'] = max_match_location['location']
            event['matched_name'] = max_match_location['name']
            matched_events.append(event)
        else:
            unmatched_events.append(event)
    elif not 'hosted_by_camp' in event:
        null_event_indexes.append(index)

# To remove null entries from list, we must move in reverse
# to preserve list order as we remove
null_event_indexes.reverse()
for index in null_event_indexes:
    events_json.pop(index)

unmatched_events_file = open('./results/unmatched_events.json', 'wb')
unmatched_events_file.write(convert_html_entities(json.dumps(unmatched_events, sort_keys=True, indent=4)))

result_file = open('./results/event_data_and_locations.json', 'wb')
result_file.write(convert_html_entities(json.dumps(events_json, sort_keys=True, indent=4)))

if len(unmatched_events) > 0:
    print "Matches not found for " + str(len(unmatched_events)) + " events"

print "Matched events: "+ str(len(matched_events))
          print "Best match for " + camp['name'] + " : " + max_match_location['name'] + " (confidence: " + str(max_match) + ")"
          unmatched_camps.append(camp)
          
    else:
        null_camp_indexes.append(index)

# To remove null entries from list, we must move in reverse
# to preserve list order as we remove
null_camp_indexes.reverse()
for index in null_camp_indexes:
    playa_json.pop(index)

unmatched_camps_file = codecs.open('./results/unmatched_camps.json', 'w', "utf-8")
json_content = json.dumps(unmatched_camps, sort_keys=True, indent=4)
json_stripped = json_content.strip(codecs.BOM_UTF8)
json_stripped_cleaned = convert_html_entities(json_stripped)
unmatched_camps_file.write(json_stripped_cleaned)
#unmatched_camps_file.write(convert_html_entities(json.dumps(unmatched_camps, sort_keys=True, indent=4)).strip(codecs.BOM_UTF8))

result_file = codecs.open('./results/camp_data_and_locations.json', 'w', "utf-8")
json_content = json.dumps(playa_json, sort_keys=True, indent=4)
json_stripped = json_content.strip(codecs.BOM_UTF8)
json_stripped_cleaned = convert_html_entities(json_stripped)
result_file.write(json_stripped_cleaned)
#result_file.write(convert_html_entities(json.dumps(playa_json, sort_keys=True, indent=4)).strip(codecs.BOM_UTF8))

if len(unmatched_camps) > 0:
    print "Location not determined for " + str(len(unmatched_camps)) + " camps"

print "Matched camps: "+str(len(matched_camps))
                    'latitude'] != "":
                event['latitude'] = max_match_location['latitude']
                event['longitude'] = max_match_location['longitude']
            #event['location'] = max_match_location['location']
            event['matched_name'] = max_match_location['name']
            matched_events.append(event)
        else:
            unmatched_events.append(event)
    elif not 'hosted_by_camp' in event:
        null_event_indexes.append(index)

# To remove null entries from list, we must move in reverse
# to preserve list order as we remove
null_event_indexes.reverse()
for index in null_event_indexes:
    events_json.pop(index)

unmatched_events_file = open('./results/unmatched_events.json', 'wb')
unmatched_events_file.write(
    convert_html_entities(
        json.dumps(unmatched_events, sort_keys=True, indent=4)))

result_file = open('./results/event_data_and_locations.json', 'wb')
result_file.write(
    convert_html_entities(json.dumps(events_json, sort_keys=True, indent=4)))

if len(unmatched_events) > 0:
    print "Matches not found for " + str(len(unmatched_events)) + " events"

print "Matched events: " + str(len(matched_events))