def geocode_acc(results): with open(os.path.join('./tests/resources', '10_acc.txt'), 'a') as datafile: input_num = 0 output_num = 0 distance_list = [] for paper in results: pmid = str(paper['MedlineCitation']['PMID']) datafile.write('\nPMID: ' + pmid + '\n') author_list = paper['MedlineCitation']['Article']['AuthorList'] for author in author_list: for a in author['AffiliationInfo']: if 'lat' in a: # coordinates have been found manually formatted_address = format_address_10(a['Affiliation']) # change format_address if formatted_address: datafile.write('\tInput address: ' + a['Affiliation'] + '\n') place = geocode.get_location(formatted_address) datafile.write('\t\tFormatted address: ' + formatted_address + '\n') if place: input_num += 1 datafile.write('\tOutput address: ' + place['name'] + '\n') datafile.write('\tResult coordinates: ' + str(place['geometry']['location']) + '\n') distance = getDistanceFromLatLonInKm( place['geometry']['location']['lat'], place['geometry']['location']['lng'], a['lat'], a['lng']) distance_list.append(distance) if distance < 5: output_num += 1 datafile.write('\t\tDistance from target: ' + str(distance) + ' km\n') else: datafile.write('\t\tOutput address: NONE' + '\n') datafile.write(str(distance_list) + '\n\n') datafile.write('\nMEAN DISTANCE: ' + str(statistics.mean(distance_list)) + ' km\n') datafile.write('\n1 STANDARD DEVIATION: ' + str(statistics.stdev(distance_list)) + ' km\n') if input_num > 0: datafile.write('\nSUCCESS RATE: ' + str(output_num / input_num * 100) + '%\n') datafile.close()
def geocode_hit(results): with open(os.path.join('./tests/resources', '10_hit.txt'), 'a') as datafile: input_num = 0 output_num = 0 alphanumeric_addresses = set() # do not analyse duplicate addresses for paper in results: pmid = str(paper['MedlineCitation']['PMID']) datafile.write('\nPMID: ' + pmid + '\n') author_list = paper['MedlineCitation']['Article']['AuthorList'] for author in author_list: for place in author['AffiliationInfo']: individual_addresses = place['Affiliation'].split(';') for f in individual_addresses: formatted_address = format_address_10(f) # change format_address alphanumeric = re.sub('[\W]', '', formatted_address).upper() if alphanumeric != '' and alphanumeric not in alphanumeric_addresses and formatted_address: alphanumeric_addresses.add(alphanumeric) datafile.write('\tInput address: ' + f + '\n') input_num += 1 place = geocode.get_location(formatted_address) datafile.write('\tFormatted address: ' + formatted_address + '\n') if place: datafile.write('\tOutput address: ' + place['name'] + '\n') output_num += 1 else: datafile.write('\tOutput address: NONE' + '\n') if input_num > 0: datafile.write('\nSUCCESS RATE: ' + str(output_num / input_num * 100) + '%\n') datafile.close()
def doublecheck(): place = """TN, USA""" print(geocode.get_location(place))