Ejemplo n.º 1
0
def geocode_acc(results):
    with open(os.path.join('./tests/resources', '10_acc.txt'), 'a') as datafile:
        input_num = 0
        output_num = 0
        distance_list = []

        for paper in results:
            pmid = str(paper['MedlineCitation']['PMID'])
            datafile.write('\nPMID: ' + pmid + '\n')

            author_list = paper['MedlineCitation']['Article']['AuthorList']

            for author in author_list:
                for a in author['AffiliationInfo']:
                    
                    if 'lat' in a: # coordinates have been found manually
                        formatted_address = format_address_10(a['Affiliation']) # change format_address
                        if formatted_address:
                            datafile.write('\tInput address: ' + a['Affiliation'] + '\n')
                            place = geocode.get_location(formatted_address)
                            datafile.write('\t\tFormatted address: ' + formatted_address + '\n')                            
        
                            if place:
                                input_num += 1
                                datafile.write('\tOutput address: ' + place['name'] + '\n')
                                datafile.write('\tResult coordinates: ' + str(place['geometry']['location']) + '\n')
                                distance = getDistanceFromLatLonInKm(
                                    place['geometry']['location']['lat'],
                                    place['geometry']['location']['lng'],
                                    a['lat'],
                                    a['lng'])
                                distance_list.append(distance)

                                if distance < 5:
                                    output_num += 1
                                
                                datafile.write('\t\tDistance from target: ' + str(distance) + ' km\n')
                            else:
                                datafile.write('\t\tOutput address: NONE' + '\n')

        datafile.write(str(distance_list) + '\n\n')
        datafile.write('\nMEAN DISTANCE: ' + str(statistics.mean(distance_list)) + ' km\n')
        datafile.write('\n1 STANDARD DEVIATION: ' + str(statistics.stdev(distance_list)) + ' km\n')

        if input_num > 0:
            datafile.write('\nSUCCESS RATE: ' + str(output_num / input_num * 100) + '%\n')    
    datafile.close()
Ejemplo n.º 2
0
def geocode_hit(results):
    with open(os.path.join('./tests/resources', '10_hit.txt'), 'a') as datafile:
        input_num = 0
        output_num = 0
        alphanumeric_addresses = set() # do not analyse duplicate addresses

        for paper in results:
            pmid = str(paper['MedlineCitation']['PMID'])
            datafile.write('\nPMID: ' + pmid + '\n')

            author_list = paper['MedlineCitation']['Article']['AuthorList']

            for author in author_list:
                for place in author['AffiliationInfo']:
                    individual_addresses = place['Affiliation'].split(';')

                    for f in individual_addresses:
                        formatted_address = format_address_10(f) # change format_address
                        alphanumeric = re.sub('[\W]', '', formatted_address).upper()
                        
                        if alphanumeric != '' and alphanumeric not in alphanumeric_addresses and formatted_address:
                            alphanumeric_addresses.add(alphanumeric)
                            datafile.write('\tInput address: ' + f + '\n')
                            input_num += 1
                            place = geocode.get_location(formatted_address)
                            datafile.write('\tFormatted address: ' + formatted_address + '\n')                            
        
                            if place:
                                datafile.write('\tOutput address: ' + place['name'] + '\n')
                                output_num += 1
                            else:
                                datafile.write('\tOutput address: NONE' + '\n')

        if input_num > 0:
            datafile.write('\nSUCCESS RATE: ' + str(output_num / input_num * 100) + '%\n')
    datafile.close()
Ejemplo n.º 3
0
def doublecheck():
    place = """TN, USA"""
    print(geocode.get_location(place))