Ejemplo n.º 1
0
def get_focal_point(location_list, r1):
    # list of location set around every point in tuple form (location, set)
    local_list_list = []
    # iterate through location_list to create sets for each location
    for location in location_list:

        lat1 = location.get_lat()
        lon1 = location.get_lng()
        #print(str(lat1) + ', ' + str(lon1))
        local_list = []
        for other_location in location_list:
            lat2 = other_location.get_lat()
            lon2 = other_location.get_lng()
            #print(str(lat2) + ', ' + str(lon2))
            try:
                dist = formulas.haversine(lat1, lon1, lat2, lon2)
                if dist <= float(r1):
                    local_list.append(other_location)
            except:
                pass

        local_list_list.append((location, local_list))

    #local set around focal point of locations in (address, lat, long)
    local_list = []
    # focal point
    focal_point = None
    #iterate through dictionary to get largest set and focal point
    for loc, self_list in local_list_list:
        if (len(self_list) > len(local_list)):
            local_list = self_list
            focal_point = loc

    return (focal_point, local_list)
Ejemplo n.º 2
0
def direct_flight_builder(loc_a, coord_a, radius_a, loc_b, coord_b, radius_b):
    (lat1, lon1) = coord_a
    (lat2, lon2) = coord_b

    #ONLY DO THIS CALCULATION IF THE TWO LOCATIONS ARE MORE THAN N KM APART
    MINIMUM_DISTANCE = 100
    if formulas.haversine(lat1, lon1, lat2, lon2) < MINIMUM_DISTANCE:
        return

    # prepare the csv output
    with open('outputs/flights.csv', 'a', newline="\n",
              encoding='utf-8-sig') as out_file:
        # search through set of airports to get the set of airports within the radius for each location
        loc_a_airports = organized_set.get_subset(coord_a, radius_a)
        loc_b_airports = organized_set.get_subset(coord_b, radius_b)

        airports_a_list = []
        for close_airport in loc_a_airports:
            airports_a_list.append(str(close_airport.iata))

        airports_b_list = []
        for close_airport in loc_b_airports:
            airports_b_list.append(str(close_airport.iata))

        csv_writer = csv.writer(out_file, delimiter=',')

        #write the first several values
        date_list = [
            loc_a, loc_b,
            str(radius_a),
            str(radius_b), '; '.join(airports_a_list),
            '; '.join(airports_b_list),
            str(coord_a),
            str(coord_b)
        ]

        #keep track of the flights themselves
        direct_flights_set = set()

        for year in range(1990, 2019):
            for month in range(1, 13):
                num_of_flights = 0
                date = int(str(year) + str(month))

                #data ends in August, 2018
                if date > 20188:
                    break

                # check if there is a flight from some airport in a to some airport in b
                for airport_a in loc_a_airports:
                    for airport_b in loc_b_airports:
                        if (airport_a.has_flight(date, airport_b)):
                            num_of_flights += 1
                            direct_flights_set.add(
                                str(airport_a.iata) + "-" +
                                str(airport_b.iata))
                date_list.append(str(num_of_flights))
        date_list.insert(6, '; '.join(direct_flights_set))
        csv_writer.writerow(date_list)
def create_remote_set(focal_point, location_list, r2):
    #remote set around focal point of locations in (address, lat, long)
    remote_set = set()

    lat1 = focal_point[0]
    lon1 = focal_point[1]
    for loc in location_list:
        lat2 = loc[0]
        lon2 = loc[1]
        dist = formulas.haversine(lat1, lon1, lat2, lon2)
        if dist > float(r2):
            remote_set.add(loc)
    return remote_set
Ejemplo n.º 4
0
def get_focal_point(location_list, r1):

    # get lat and long for each address
    '''
    for address in address_list:
        response = requests.get("http://dev.virtualearth.net/REST/v1/Locations/" + address,
                            params={"include":"queryParse",
                            "key":"AvQOaBs2cYn6OAWmZ9tEAvGuJGfJusGnLSyHnD9g7USe35x69PmSiyk_51Htk3Z0"})
        data = response.json()
        lat = data['resourceSets'][0]['resources'][0]['point']['coordinates'][0]
        lng = data['resourceSets'][0]['resources'][0]['point']['coordinates'][1]
        
        # add to location_list
        location_list.append(address, lat, lng)

        print(str(lat) + ", " + str(lng))
    
    '''
    #list of location set around every point in tuple form (location, set)
    local_set_list = []
    # iterate through location_list to create sets for each location
    for location in location_list:

        lat1 = location[0]
        lon1 = location[1]
        local_set = set()
        for other_location in location_list:
            lat2 = other_location[0]
            lon2 = other_location[1]
            try:
                dist = formulas.haversine(lat1, lon1, lat2, lon2)
                if dist <= float(r1):
                    local_set.add(other_location)
            except:
                pass

        local_set_list.append((location, local_set))

    #local set around focal point of locations in (address, lat, long)
    local_set = set()
    # focal point
    focal_point = None
    #iterate through dictionary to get largest set and focal point
    for loc, self_set in local_set_list:
        if (len(self_set) > len(local_set)):
            local_set = self_set
            focal_point = loc

    return (focal_point, local_set)
Ejemplo n.º 5
0
def create_remote_list(focal_point, location_list, r2):
    #remote list around focal point of locations in (address, lat, long)
    remote_list = []
    lat1 = focal_point.get_lat()
    lon1 = focal_point.get_lng()
    for loc in location_list:
        lat2 = loc.get_lat()
        lon2 = loc.get_lng()
        try:
            dist = formulas.haversine(lat1, lon1, lat2, lon2)
            if dist > float(r2):
                remote_list.append(loc)
        except:
            pass
    return remote_list
Ejemplo n.º 6
0
    def checklist(self, busList, threshold, routekey, timestamp):

        bunching_incidences = []

        for a, b in itertools.combinations(busList, 2):

            result = haversine(a.latitude, a.longitude, b.latitude, b.longitude)

            if (result < threshold):

                location = midpoint(a.latitude, a.longitude, b.latitude, b.longitude)

                bi = BunchingInstance(routekey, location[0], location[1], timestamp)
                bunching_incidences.append(bi)

        return bunching_incidences
Ejemplo n.º 7
0
 def get_subset(self, coordinates, radius):
     (lat, lng) = coordinates
     
     #check that the coordinates exist
     if (abs(lat) > 90 or abs(lng > 180)):
         raise ValueError("get_subset: the coordinates are invalid")
     
     #determine the row and column associated with the coordinates
     (row, column) = self.__get_row_and_column(coordinates)
     key = self.__get_key(row, column)
     
     #create the set of airports - give it the airports in the same box as coordinate
     close_airports = self.sets[key]
     
     #the formula assumes units in kilometers!
     dlat = formulas.change_in_latitude(radius)
     
     #check the corners of the radius bounding box
     latHigh = lat + dlat
     latLow = lat - dlat
     
     dlngHigh = formulas.change_in_longitude(latHigh, radius)
     dlngLow = formulas.change_in_longitude(latLow, radius)
     
     (row_u_l, column_u_l) = self.__get_row_and_column((latHigh, lng - dlngHigh))
     (row_l_r, column_l_r) = self.__get_row_and_column((latLow, lng + dlngLow))
     
     # union all airports encompassed by bounding box - from upper left corner to lower right
     drow = row_l_r - row_u_l
     dcolumn = column_l_r - column_u_l
     
     for i in range(drow + 1):
         for j in range(dcolumn + 1):
             temp_key = self.__get_key(row_u_l + i, column_u_l + j)
             close_airports.update(self.sets[temp_key])
     
     # the actual set of airports within the radius
     airportSet = set()
     
     for airport in close_airports:
         airportCoord = airport.coordinates
         dist = formulas.haversine(coordinates[0], coordinates[1], airportCoord[0], airportCoord[1])
         if dist <= radius:
             airportSet.add(airport)
         
     return airportSet
Ejemplo n.º 8
0
def percent_coverage_generator(center, big_cluster, percent_coverage):
    # percentage coverage
    # first create a list of tuples (distance, coord)
    loc_list_tuples = []
    for group in big_cluster:
        #list of tuples
        location_list = group.get_locations()
        for coord in location_list:
            dist = formulas.haversine(center.get_lat(), center.get_lng(),
                                      coord[0], coord[1])
            loc_list_tuples.append((dist, coord))
    loc_list_tuples.sort(key=lambda tup: tup[0])  # sorts in place
    num_to_include = math.ceil(float(percent_coverage) * len(loc_list_tuples))
    local_inventors = []
    for coord in loc_list_tuples[:num_to_include]:
        local_inventors.append(coord[1])
    radius_for_percent = loc_list_tuples[num_to_include - 1][0]
    # dict for local inventors
    local_inventor_dict = {
        'radius': radius_for_percent,
        'count': num_to_include,
        'locations': local_inventors
    }
    return local_inventor_dict
Ejemplo n.º 9
0
def output_each_patent(ungrouped, patent, r1, r2, company_id, company):
    global api_calls

    #get the local locations
    (local_center, local_set) = get_focal_point(ungrouped, r1)
    #get the remote locations
    remote_set = create_remote_list(local_center, ungrouped, r2)
    #find the locations that are not local and not remote
    inbetween = []
    for loc in ungrouped:
        if loc not in remote_set and not loc in local_set:
            inbetween.append(loc)

    #get the country data
    country = local_center.get_country()
    state = local_center.get_state()
    if country != 'US':
        local_center.set_state("N/A")
    elif country == 'US':
        if state == '':
            coord2 = str(local_center.get_lat()) + "," + str(
                local_center.get_lng())
            response2 = requests.get(
                "http://dev.virtualearth.net/REST/v1/Locations/" + coord2,
                params={
                    "key": formulas.get_api_key(),
                })
            api_calls += 1
            data2 = response2.json()

            try:
                state = str(data2['resourceSets'][0]['resources'][0]['address']
                            ['adminDistrict'])
                local_center.set_state(state)
            except:
                local_center.set_state("N/A")
    else:
        coord2 = str(other_center.get_lat()) + "," + str(
            other_center.get_lng())
        response2 = requests.get(
            "http://dev.virtualearth.net/REST/v1/Locations/" + coord2,
            params={
                "key": formulas.get_api_key(),
            })
        api_calls += 1
        data2 = response2.json()
        if state == '':
            try:
                country = str(data2['resourceSets'][0]['resources'][0]
                              ['address']['countryRegion'])
                if country == 'US':
                    state = str(data2['resourceSets'][0]['resources'][0]
                                ['address']['adminDistrict'])
                    local_center.set_state(state)
                else:
                    local_center.set_state("N/A")
            except:
                country = "N/A"
                local_center.set_state("N/A")
        else:
            try:
                country = str(data2['resourceSets'][0]['resources'][0]
                              ['address']['countryRegion'])
            except:
                country = "N/A"

    #list of sets of remote groups
    remote_groups = []

    while len(remote_set) > 0:
        #get largest remote group, add to remote groups and remove from set of ungrouped remotes
        (remote_center, remote_group) = get_focal_point(remote_set, r1)
        remote_groups.append((remote_center, remote_group))
        for loc in remote_group:
            remote_set.remove(loc)

    with open(output_name, 'a', newline="\n", encoding='latin-1') as out_file:
        csv_writer = csv.writer(out_file, delimiter='\t')

        global cluster_id
        # local cluster
        row = []
        row.append(cluster_id)
        row.append(company_id)
        row.append(company)
        row.append(patent)
        row.append(r1)
        row.append(r2)
        # convert local_set from a set of tuples to a list of strings
        local_set_string = []
        for loc in local_set:
            coord = '(' + str(loc.get_lat()) + ' ' + str(loc.get_lng()) + ')'

            local_set_string.append(coord)
        # row for local_cluster
        row.append(len(local_set))
        row.append('; '.join(local_set_string))
        row.append(local_center.get_lat())
        row.append(local_center.get_lng())
        row.append(local_center.get_state())
        row.append(country)
        row.append('domestic')
        row.append('N/A')
        csv_writer.writerow(row)
        cluster_id += 1

        # nonlocal cluster
        # convert inbetween set from a set of tuples to a list of strings
        inbetween_set_string = []
        for loc in inbetween:
            coord = '(' + str(loc.get_lat()) + ' ' + str(loc.get_lng()) + ')'
            inbetween_set_string.append(coord)

        if len(inbetween_set_string) == 0:
            row.append('N/A')
        else:
            row = []
            row.append(cluster_id)
            row.append(company_id)
            row.append(company)
            row.append(patent)
            row.append(r1)
            row.append(r2)
            row.append(len(inbetween))
            row.append('; '.join(inbetween_set_string))
            row.append('N/A')
            row.append('N/A')
            row.append('N/A')
            row.append('N/A')
            row.append('N/A')
            row.append('N/A')
            csv_writer.writerow(row)
            cluster_id += 1

        if (len(remote_groups) == 0):
            row.append('N/A')
        else:
            # sort remote groups by distance away from local focal point
            remote_group_list = []
            for remote_group in remote_groups:
                (loc, group) = remote_group
                size = len(group)
                remote_group_list.append((loc, group, size))
                remote_group_list.sort(
                    key=lambda tup: tup[2])  # sorts in place
                remote_group_list.reverse()

            for remote_group in remote_group_list:
                (center, group, size) = remote_group
                # convert remote_group from a set of tuples to a list of strings
                remote_group_string = []
                for loc in group:
                    coord = '(' + str(loc.get_lat()) + ' ' + str(
                        loc.get_lng()) + ')'
                    remote_group_string.append(coord)
                (c1, c2, rel) = generate_geo_relationship(country, center)
                # write remote_group
                row = []
                row.append(cluster_id)
                row.append(company_id)
                row.append(company)
                row.append(patent)
                row.append(r1)
                row.append(r2)
                row.append(len(group))
                row.append('; '.join(remote_group_string))
                row.append(center.get_lat())
                row.append(center.get_lng())
                row.append(center.get_state())
                row.append(c2)
                row.append(rel)
                row.append(
                    formulas.haversine(local_center.get_lat(),
                                       local_center.get_lng(),
                                       center.get_lat(), center.get_lng()))
                csv_writer.writerow(row)
                cluster_id += 1
Ejemplo n.º 10
0
def output_each_patent(ungrouped, company, company_id, base_radius,
                       coverage_percentage):
    print(company)
    print(len(ungrouped))
    #get the clusters centered around the headquarters
    (hq, hq_set) = get_focal_point(ungrouped, base_radius)

    #get the remote locations - assume that every cluster not in the headquarters's radius is a remote location
    remote_set = [i for i in ungrouped if not i in hq_set]

    #row to write
    row = [
        company, company_id,
        len(hq_set),
        len(remote_set),
        len(hq_set) + len(remote_set),
        str(base_radius),
        str(coverage_percentage)
    ]

    # get local country and state
    country = hq.get_country()
    state = hq.get_state()
    if country != 'US':
        hq.set_state("N/A")
    elif country == 'US':
        if state == '':
            coord2 = str(other_center.get_lat()) + "," + str(
                other_center.get_lng())
            response2 = requests.get(
                "http://dev.virtualearth.net/REST/v1/Locations/" + coord2,
                params={
                    "key": formulas.get_api_key(),
                })

            api_calls += 1

            data2 = response2.json()
            try:
                state = str(data2['resourceSets'][0]['resources'][0]['address']
                            ['adminDistrict'])
                hq.set_state(state)
            except:
                hq.set_state("N/A")
    else:
        coord2 = str(other_center.get_lat()) + "," + str(
            other_center.get_lng())
        response2 = requests.get(
            "http://dev.virtualearth.net/REST/v1/Locations/" + coord2,
            params={
                "key": formulas.get_api_key(),
            })
        api_calls += 1

        data2 = response2.json()
        if state == '':
            try:
                country = str(data2['resourceSets'][0]['resources'][0]
                              ['address']['countryRegion'])
                if country == 'US':
                    state = str(data2['resourceSets'][0]['resources'][0]
                                ['address']['adminDistrict'])
                    hq.set_state(state)
                else:
                    hq.set_state("N/A")
            except:
                country = "N/A"
                hq.set_state("N/A")
        else:
            try:
                country = str(data2['resourceSets'][0]['resources'][0]
                              ['address']['countryRegion'])
            except:
                country = "N/A"

    #list of sets of remote groups
    remote_groups = []

    while len(remote_set) > 0:
        #print(len(remote_set))
        #get largest remote group, add to remote groups and remove from set of ungrouped remotes
        (remote_center,
         remote_group) = get_focal_point(remote_set, base_radius)
        print(len(remote_set))
        remote_groups.append((remote_center, remote_group))
        for loc in remote_group:
            remote_set.remove(loc)

    with open(output_name, 'a', newline="\n",
              encoding='utf-8-sig') as out_file:
        csv_writer = csv.writer(out_file, delimiter='\t')

        # convert local_set from a set of tuples to a list of strings
        local_set_string = []
        for loc in hq_set:
            coord = loc.get_group_id()
            local_set_string.append(coord)
        # dict for local_cluster
        local_cluster_dict = {
            'number_of_patent_groups_in_cluster': len(local_set_string),
            'locations_id': '; '.join(local_set_string),
            'center_lat': hq.get_lat(),
            'center_lng': hq.get_lng(),
            'state': state,
            'country': country,
            'geographical_relationship': 'domestic',
            'haversine_distance_to_local': 'N/A'
        }
        row.append(local_cluster_dict)

        # sort remote groups by distance away from local focal point
        remote_group_list = []
        for remote_group in remote_groups:
            (loc, group) = remote_group
            size = len(group)
            remote_group_list.append((loc, group, size))
            remote_group_list.sort(key=lambda tup: tup[2])  # sorts in place
            remote_group_list.reverse()

        #this is the number of remote groups of clusters
        num_of_remote_groups = len(remote_group_list)
        total_num_of_groups = num_of_remote_groups + 1

        #add that information to the row
        row.insert(5, total_num_of_groups)
        row.insert(6, num_of_remote_groups)

        write_remote_cluster = []
        for remote_group in remote_group_list:
            (center, group, size) = remote_group
            # convert remote_group from a set of tuples to a list of strings
            remote_group_string = []
            for loc in group:
                coord = loc.get_group_id()
                remote_group_string.append(coord)
            (c1, c2, rel) = generate_geo_relationship(country, center)
            # dict for remote_cluster
            remote_cluster_dict = {
                'number_of_patent_groups_in_cluster':
                len(group),
                'locations_id':
                '; '.join(remote_group_string),
                'center_lat':
                center.get_lat(),
                'center_lng':
                center.get_lng(),
                'state':
                center.get_state(),
                'country':
                c2,
                'geographical_relationship:':
                rel,
                'haversine_distance_to_local':
                formulas.haversine(hq.get_lat(), hq.get_lng(),
                                   center.get_lat(), center.get_lng())
            }
            write_remote_cluster.append(remote_cluster_dict)
        if (len(write_remote_cluster) == 0):
            row.append('N/A')
        else:
            row.append(write_remote_cluster)

        # percentage coverage for local
        local_inventor_dict = percent_coverage_generator(
            hq, hq_set, fast_real(coverage_percentage))
        row.append(local_inventor_dict)

        # percentage coverage for remote
        if num_of_remote_groups == 0:
            row.append('N/A')
        else:
            remote_inventor_dict_list = []
            for remote_group in remote_group_list:
                (center, group, size) = remote_group
                remote_inventor_dict = percent_coverage_generator(
                    center, group, fast_real(coverage_percentage))
                remote_inventor_dict_list.append(remote_inventor_dict)
            row.append(remote_inventor_dict_list)
        csv_writer.writerow(row)
def output_each_patent(ungrouped, patent, r1, r2):
    #get the local locations
    (local_center, local_set) = get_focal_point(ungrouped, r1)
    #get the remote locations
    remote_set = create_remote_set(local_center, ungrouped, r2)
    #find the locations that are not local and not remote
    inbetween = set(ungrouped) - remote_set - local_set

    #row to write
    row = []
    row.append(patent)
    row.append(len(ungrouped))
    row.append(len(local_set))
    row.append(len(remote_set))

    # get local country
    coord1 = str(local_center[0]) + "," + str(local_center[1])
    response1 = requests.get(
        "http://dev.virtualearth.net/REST/v1/Locations/" + coord1,
        params={
            "key":
            "AjhzSUKjNFFV0ckKVCV64tSLhw_EWSlN6LP9UPiWdEJDRMZn3Vm17HtoSclZZfO_ ",
        })
    data1 = response1.json()
    #get the country data
    try:
        country1 = str(data1['resourceSets'][0]['resources'][0]['address']
                       ['countryRegion'])
    except:
        country1 = "N/A"

    #list of sets of remote groups
    remote_groups = []

    while len(remote_set) > 0:
        #get largest remote group, add to remote groups and remove from set of ungrouped remotes
        remote_group = get_focal_point(remote_set, r1)
        remote_groups.append(remote_group)
        remote_set -= remote_group[1]

    row.append(1 + len(remote_groups))  # number of clusters
    row.append(r1)  # local radius
    row.append(r2)  # remote radius
    with open('outputs/output.csv', 'a', newline="\n",
              encoding='latin-1') as out_file:
        csv_writer = csv.writer(out_file, delimiter=',')
        '''
        header = ["patent_id", "number_of_inventors", "number_of_local_inventors", "number_of_remote_inventors", "number_of_clusters (local+remote)", "radius_local", 
                  "radius_remote", "local_cluster", "nonlocal_cluster", "remote_cluster"]
        csv_writer.writerow(header)
        '''
        # header = ["group_classification", "locations", "point_lat", "point_lng", "country", "geographical_relationship", "haversine_distance_to_local"]

        # convert local_set from a set of tuples to a list of strings
        local_set_string = []
        for (lat, lon, id) in local_set:
            coord = '(' + str(lat) + ',' + str(lon) + ')'
            local_set_string.append(coord)
        # dict for local_cluster
        local_cluster_dict = {
            'number_of_inventors_in_cluster': len(local_set),
            'locations': '; '.join(local_set_string),
            'center_lat': local_center[0],
            'center_lng': local_center[1],
            'country': country1,
            'geographical_relationship:': 'domestic',
            'haversine_distance_to_local': 'N/A'
        }
        row.append(local_cluster_dict)

        # convert inbetween set from a set of tuples to a list of strings
        inbetween_set_string = []
        for (lat, lon, id) in inbetween:
            coord = '(' + str(lat) + ',' + str(lon) + ')'
            inbetween_set_string.append(coord)

        # dict for nonlocal_cluster
        nonlocal_cluster_dict = {
            'number_of_inventors_in_cluster': len(inbetween),
            'locations': '; '.join(inbetween_set_string),
            'center_lat': 'N/A',
            'center_lng': 'N/A',
            'country': 'N/A',
            'geographical_relationship:': 'N/A',
            'haversine_distance_to_local': 'N/A'
        }
        row.append(nonlocal_cluster_dict)

        # sort remote groups by distance away from local focal point
        remote_group_list = []
        for remote_group in remote_groups:
            (coordinates, group) = remote_group
            dist = formulas.haversine(local_center[0], local_center[1],
                                      coordinates[0], coordinates[1])
            remote_group_list.append((coordinates, group, dist))
            remote_group_list.sort(key=lambda tup: tup[2])  # sorts in place

        for remote_group in remote_group_list:
            (coordinates, group, dist) = remote_group
            # convert remote_group from a set of tuples to a list of strings
            remote_group_string = []
            for (lat, lon, id) in group:
                coord = '(' + str(lat) + ',' + str(lon) + ')'
                remote_group_string.append(coord)
            (c1, c2, rel) = generate_geo_relationship(country1, coordinates)
            # dict for remote_cluster
            remote_cluster_dict = {
                'number_of_inventors_in_cluster':
                len(group),
                'locations':
                '; '.join(remote_group_string),
                'center_lat':
                coordinates[0],
                'center_lng':
                coordinates[1],
                'country':
                c2,
                'geographical_relationship:':
                rel,
                'haversine_distance_to_local':
                formulas.haversine(local_center[0], local_center[1],
                                   coordinates[0], coordinates[1])
            }
            row.append(remote_cluster_dict)
        csv_writer.writerow(row)
Ejemplo n.º 12
0
import formulas
'''
word = "(32.2217 -110.9258); (32.2217 -110.9258)"
list = word.split("; ")
print(list)
for s in list:
    oc = s[s.find("(")+1:s.find(")")]
    coord = oc.split(" ")
print(coord)
'''

print(formulas.haversine(37.9375, -107.8117, 40.7975, -81.165))
Ejemplo n.º 13
0
def output_each_patent(ungrouped, company, id, r1, r2):
    #get the local locations
    (local_center, local_set) = get_focal_point(ungrouped, r1)
    #get the remote locations
    remote_set = create_remote_list(local_center, ungrouped, r2)
    #find the locations that are not local and not remote
    inbetween = []
    for loc in ungrouped:
        if loc not in remote_set and not loc in local_set:
            inbetween.append(loc)

    #row to write
    row = []
    row.append(company)
    row.append(id)
    row.append(len(ungrouped))
    row.append(len(local_set))
    row.append(len(remote_set))

    #get the country data
    country = local_center.get_country()
    state = local_center.get_state()
    if country != 'US':
        local_center.set_state("N/A")
    elif country == 'US':
        if state == '':
            coord2 = str(other_center.get_lat()) + "," + str(
                other_center.get_lng())
            response2 = requests.get(
                "http://dev.virtualearth.net/REST/v1/Locations/" + coord2,
                params={
                    "key": formulas.get_api_key(),
                })
            data2 = response2.json()
            try:
                state = str(data2['resourceSets'][0]['resources'][0]['address']
                            ['adminDistrict'])
                local_center.set_state(state)
            except:
                local_center.set_state("N/A")
    else:
        coord2 = str(other_center.get_lat()) + "," + str(
            other_center.get_lng())
        response2 = requests.get(
            "http://dev.virtualearth.net/REST/v1/Locations/" + coord2,
            params={
                "key": formulas.get_api_key(),
            })
        data2 = response2.json()
        if state == '':
            try:
                country = str(data2['resourceSets'][0]['resources'][0]
                              ['address']['countryRegion'])
                if country == 'US':
                    state = str(data2['resourceSets'][0]['resources'][0]
                                ['address']['adminDistrict'])
                    local_center.set_state(state)
                else:
                    local_center.set_state("N/A")
            except:
                country = "N/A"
                local_center.set_state("N/A")
        else:
            try:
                country = str(data2['resourceSets'][0]['resources'][0]
                              ['address']['countryRegion'])
            except:
                country = "N/A"

    #list of sets of remote groups
    remote_groups = []

    while len(remote_set) > 0:
        #get largest remote group, add to remote groups and remove from set of ungrouped remotes
        (remote_center, remote_group) = get_focal_point(remote_set, r1)
        remote_groups.append((remote_center, remote_group))
        for loc in remote_group:
            remote_set.remove(loc)

    row.append(1 + len(remote_groups))  # number of clusters
    row.append(r1)  # local radius
    row.append(r2)  # remote radius
    with open('outputs/groupings.tsv', 'a', newline="\n",
              encoding='latin-1') as out_file:
        csv_writer = csv.writer(out_file, delimiter='\t')

        # convert local_set from a set of tuples to a list of strings
        local_set_string = []
        for loc in local_set:
            coord = '(' + str(loc.get_lat()) + ' ' + str(loc.get_lng()) + ')'
            local_set_string.append(coord)
        # dict for local_cluster
        local_cluster_dict = {
            'number_of_inventors_in_cluster': len(local_set),
            'locations': '; '.join(local_set_string),
            'center_lat': local_center.get_lat(),
            'center_lng': local_center.get_lng(),
            'state': state,
            'country': country,
            'geographical_relationship': 'domestic',
            'haversine_distance_to_local': 'N/A'
        }
        row.append(local_cluster_dict)

        # convert inbetween set from a set of tuples to a list of strings
        inbetween_set_string = []
        for loc in inbetween:
            coord = '(' + str(loc.get_lat()) + ' ' + str(loc.get_lng()) + ')'
            inbetween_set_string.append(coord)

        if len(inbetween_set_string) == 0:
            row.append('N/A')
        else:
            # dict for nonlocal_cluster
            nonlocal_cluster_dict = {
                'number_of_inventors_in_cluster': len(inbetween),
                'locations': '; '.join(inbetween_set_string),
                'center_lat': 'N/A',
                'center_lng': 'N/A',
                'state': 'N/A',
                'country': 'N/A',
                'geographical_relationship': 'N/A',
                'haversine_distance_to_local': 'N/A'
            }
            row.append(nonlocal_cluster_dict)

        # sort remote groups by distance away from local focal point
        remote_group_list = []
        for remote_group in remote_groups:
            (loc, group) = remote_group
            size = len(group)
            remote_group_list.append((loc, group, size))
            remote_group_list.sort(key=lambda tup: tup[2])  # sorts in place
            remote_group_list.reverse()

        write_remote_cluster = []
        for remote_group in remote_group_list:
            (center, group, size) = remote_group
            # convert remote_group from a set of tuples to a list of strings
            remote_group_string = []
            for loc in group:
                coord = '(' + str(loc.get_lat()) + ' ' + str(
                    loc.get_lng()) + ')'
                remote_group_string.append(coord)
            (c1, c2, rel) = generate_geo_relationship(country, center)
            # dict for remote_cluster
            remote_cluster_dict = {
                'number_of_inventors_in_cluster':
                len(group),
                'locations':
                '; '.join(remote_group_string),
                'center_lat':
                center.get_lat(),
                'center_lng':
                center.get_lng(),
                'state':
                center.get_state(),
                'country':
                c2,
                'geographical_relationship':
                rel,
                'haversine_distance_to_local':
                formulas.haversine(local_center.get_lat(),
                                   local_center.get_lng(), center.get_lat(),
                                   center.get_lng())
            }
            write_remote_cluster.append(remote_cluster_dict)
        if (len(write_remote_cluster) == 0):
            row.append('N/A')
        else:
            row.append(write_remote_cluster)

        csv_writer.writerow(row)