Python getFilepaths Examples, helpers.getFilepaths Python Examples

Example #1

0

Show file

File: xyz_filled_in_to_obj.py Project: jamesHargreaves12/DataVis_python

def processEntityFilledInToObj(name, forceOverwrite=False):
    filepath_in, filepath_out = getFilepaths(name, FileType.XYZ_FILLED_IN,
                                             FileType.OBJ, forceOverwrite)

    log("Processing {}".format(filepath_in))
    df = pd.read_csv(filepath_in)
    log("Shape {}".format(df.shape))
    xs = np.sort(df['X'].unique())
    ys = np.sort(df['Y'].unique())

    next_bound_x = get_next_bounds(xs)
    next_bound_y = get_next_bounds(ys)
    log('Setting geometry column')
    if WITH_NAIVE_Y_SCALE:
        df['geometry'] = [
            Polygon(
                get_rect_verts(row['X'], next_bound_x[row['X']],
                               row['Y'] * UK_HEIGHT / UK_WIDTH,
                               next_bound_y[row['Y']] * UK_HEIGHT / UK_WIDTH))
            for i, row in tqdm(df.iterrows())
        ]
    else:
        df['geometry'] = [
            Polygon(
                get_rect_verts(row['X'], next_bound_x[row['X']], row['Y'],
                               next_bound_y[row['Y']]))
            for i, row in tqdm(df.iterrows())
        ]

    log('Dataframe to obj')
    dataframe_to_obj_2(scale_z(df, Z_SCALE),
                       filepath_out,
                       material_name=MATERIAL_NAME)

Example #2

0

Show file

File: MSOA_common_to_geoDF.py Project: jamesHargreaves12/DataVis_python

def processMSOAEntityCommonToGeoDF(entityName, forceOverwrite=False):
    filepath_in, filepath_out = getFilepaths(entityName,
                                             FileType.MSOA_COMMON_FORM,
                                             FileType.MSOA_GEO_DF,
                                             forceOverwrite)

    # Could convert these to be lazy loaded?
    print('Read 1')
    boundariesDf = gpd.read_file(
        "data/MSOAFiles/MSOA_2011_EW_BFC_shp/MSOA_2011_EW_BFC.shp")
    boundariesDf['geometry'] = boundariesDf['geometry'].to_crs('EPSG:4326')

    print('Read 2')
    populationPerMsoa = pd.read_csv('data/MSOAFiles/popPerMSOA.csv')
    populationPerMsoa['population'] = pd.to_numeric(
        populationPerMsoa['All Ages'].str.strip().str.replace(',', ''))

    print('Read 3')
    df = pd.read_csv(filepath_in)

    print('Merging')
    df = df.merge(boundariesDf,
                  left_on='MSOA code',
                  right_on='MSOA11CD',
                  how='left')
    df = df.merge(populationPerMsoa, on='MSOA code', how='left')

    print('saving')
    pds_poly = gpd.GeoDataFrame(df)
    pds_poly[['Z', 'geometry', 'population']].to_csv(filepath_out, index=False)

Example #3

0

Show file

def processMSOAEntityToHeatMap(entityName, forceOverwrite=False):
    filepath_in, filepath_out = getFilepaths(entityName,
                                             FileType.MSOA_SIMPLIFIED,
                                             FileType.HEATMAP, forceOverwrite)

    df = pd.read_csv(filepath_in)
    df['geometry'] = df['geometry'].apply(shapely.wkt.loads)

    log("Plotting")
    plot_uk(df)
    plt.axis("off")
    log("Saving figure")
    plt.savefig(filepath_out, bbox_inches='tight')

Example #4

0

Show file

def processEntityPointToDistance(entityName, forceOverwrite=False):
    filepath_in, filepath_out = getFilepaths(entityName, FileType.POINT,
                                             FileType.XYZ, forceOverwrite)
    [folder, filename] = os.path.split(filepath_out)
    filepath_out_inverted = os.path.join(folder, "inverted_" + filename)

    points = json.load(open(filepath_in, 'r'))
    lng_lat_points = [(float(point['lng']), float(point['lat']))
                      for point in points]
    lng_lat_points = [x for x in set(lng_lat_points)]
    example_filename = os.path.join(FILE_LOCATIONS[FileType.XYZ],
                                    'gbr_pd_2020_1km_UNadj_ASCII_XYZ.csv')

    xyz_example = csv.reader(open(example_filename))
    xyz_example = [x for x in xyz_example
                   ]  # make it a list so we have total count for tqdm
    xyz_example = xyz_example[1:]  # skip the headers

    xyz_result = []
    max_z = 0
    log('calculating distance for each pixel')
    for x, y, z in tqdm(xyz_example):
        min_sq_distance = math.inf
        x = float(x)
        y = float(y)
        dx = 0
        dy = 0
        for lng, lat in lng_lat_points:  # This could be made much much faster but hey its quick enough for now - takes like 3mins for high def
            dx = x - lng
            dy = y - lat
            distance_sq = dx * dx + dy * dy
            min_sq_distance = min(min_sq_distance, distance_sq)
        min_distance = math.sqrt(min_sq_distance)
        max_z = max(max_z, min_distance)
        xyz_result.append((x, y, min_distance))

    log('writing result to {} and {}'.format(filepath_out,
                                             filepath_out_inverted))
    if not os.path.exists(filepath_out) or forceOverwrite:
        out_template = "{},{},{}\n"
        with open(filepath_out, 'w') as out_fp:
            out_fp.write(out_template.format("X", "Y", "Z"))
            for x, y, z in tqdm(xyz_result):
                out_fp.write(out_template.format(x, y, z))

    if not os.path.exists(filepath_out_inverted) or forceOverwrite:
        out_template = "{},{},{}\n"
        with open(filepath_out_inverted, 'w') as out_fp:
            out_fp.write(out_template.format("X", "Y", "Z"))
            for x, y, z in tqdm(xyz_result):
                out_fp.write(out_template.format(x, y, max_z - z))

Example #5

0

Show file

def processEntityObjToZip(entityName, forceOverwrite=False):
    filepath_in, filepath_out = getFilepaths(entityName, FileType.OBJ, FileType.COMPRESSED, forceOverwrite)
    with open(filepath_in, 'rb') as f_in:
        with gzip.open(filepath_out, 'wb') as f_out:
            shutil.copyfileobj(f_in, f_out)


# dir = '/Users/james_hargreaves/WebstormProjects/data-visualisation/src/data/objFiles/distanceTo'
# outDir = '/Users/james_hargreaves/WebstormProjects/data-visualisation/src/data/objFilesCompressed/distanceTo'
# todo = [x for x in os.listdir(dir) if not x.endswith('.gz')]
#
# for filename in tqdm(todo):
#     path = os.path.join(dir, filename)
#     with open(path, 'rb') as f_in:
#         with gzip.open(os.path.join(outDir, filename)+'.gz', 'wb') as f_out:
#             shutil.copyfileobj(f_in, f_out)

Example #6

0

Show file

def processEntityLowResToFilledIn(entityName,
                                  forceOverwrite=False,
                                  sourceOfTruth=SOURCE_OF_TRUTH_FILEPATH):
    filepath_in, filepath_out = getFilepaths(entityName, FileType.XYZ_LOW_RES,
                                             FileType.XYZ_FILLED_IN,
                                             forceOverwrite)
    ys = set()
    xs = set()
    xy_to_z = {}
    for x, y, z in tqdm(csv.reader(open(filepath_in)), position=0, leave=True):
        if x == 'X':
            # skip header row
            continue
        xs.add(x)
        ys.add(y)
        xy_to_z[(x, y)] = z

    xs_sorted = list(sorted(map(float, xs)))
    ys_sorted = list(sorted(map(float, ys)))
    diffs_y = [
        ys_sorted[i] - ys_sorted[i - 1] for i in range(1, len(ys_sorted))
    ]
    diffs_x = [
        xs_sorted[i] - xs_sorted[i - 1] for i in range(1, len(xs_sorted))
    ]
    # fairly certain this threshold currently does nothing TODO check and remove
    y_threshold = min(diffs_y)
    x_threshold = min(diffs_x)

    uk_landmass = UkLandmass(sourceOfTruth, x_threshold)
    count = 0
    out_template = "{},{},{}\n"
    with open(filepath_out, 'w+') as out_fp:
        out_fp.write(out_template.format("X", "Y", "Z"))
        for y in tqdm(ys):
            valid_xs = uk_landmass.filter_included_all_y(
                xs_sorted, float(y), y_threshold, x_threshold)
            for x in valid_xs:
                z = xy_to_z.get((str(x), y), 'nothing found')
                if z == 'nothing found':
                    out_fp.write(out_template.format(x, y, 0))
                    count += 1
                else:
                    out_fp.write(out_template.format(x, y, z))
    log("0 value filled in for {} pixels".format(count))

Example #7

0

Show file

File: xyz_filled_in_to_obj.py Project: jamesHargreaves12/DataVis_python

def processMSOASimplifiedEntityInToObj(name, forceOverwrite=False):
    filepath_in, filepath_out = getFilepaths(name, FileType.MSOA_SIMPLIFIED,
                                             FileType.OBJ, forceOverwrite)

    log("Processing {}".format(filepath_in))
    df = pd.read_csv(filepath_in)
    if WITH_NAIVE_Y_SCALE:
        df['geometry'] = df['geometry'].apply(
            lambda p: convertPolygonToScaledXYScaledPolygon(
                shapely.wkt.loads(p)))
    else:
        df['geometry'] = df['geometry'].apply(lambda p: shapely.wkt.loads(p))
    log("Shape {}".format(df.shape))

    log('Dataframe to obj')
    dataframe_to_obj_2(scale_z(df, Z_SCALE),
                       filepath_out,
                       material_name=MATERIAL_NAME)

Example #8

0

Show file

def processEntityXYZToHeatMap(entityName, forceOverwrite=False):
    filepath_in, filepath_out = getFilepaths(entityName,
                                             FileType.XYZ_FILLED_IN,
                                             FileType.HEATMAP, forceOverwrite)
    # filepath_in = 'data/tmp/low_res.csv'
    df = pd.read_csv(filepath_in)
    xs = np.sort(df['X'].unique())
    ys = np.sort(df['Y'].unique())
    next_bound_x = get_next_bounds(xs)
    next_bound_y = get_next_bounds(ys)
    df['geometry'] = df.apply(lambda row: Polygon(
        get_rect_verts(row.X, next_bound_x[row.X], row.Y, next_bound_y[row.Y])
    ),
                              axis=1)
    log("Plotting")
    plot_uk(df)
    plt.axis('off')
    log("Saving figure")
    plt.savefig(filepath_out, bbox_inches='tight')

Example #9

0

Show file

File: postcodeToAverageXyz.py Project: jamesHargreaves12/DataVis_python

def processPostcodeToMeanXYZLowRes(name, forceOverwrite=False):
    filepath_in, filepath_out = getFilepaths(name, FileType.POSTCODE,
                                             FileType.XYZ_LOW_RES,
                                             forceOverwrite)
    logInfo('Reading price paid files')
    df = getDataFrameFromDataFile(filepath_in)
    initialNumberOfRecords = df.shape[0]
    # throw away any rows which don't have a post code ~0.40% are thrown away for the 2020 data
    df = df[df['postcode'].notna()]
    filterNoPostcodeNumRecord = df.shape[0]
    logInfo('Removed {} out of {} ({}%)records due to missing postcode'.format(
        initialNumberOfRecords - filterNoPostcodeNumRecord,
        initialNumberOfRecords,
        round(
            100 * (initialNumberOfRecords - filterNoPostcodeNumRecord) /
            initialNumberOfRecords, 2)))

    postCodeDataToAggregateXYZ(df,
                               filepath_out,
                               activeFieldName='pricePaid',
                               aggregateMethod=lambda x: sum(x) / len(x))

Example #10

0

Show file

def skipLowResToFilledIn(entityName, forceOverwrite=False):
    filepath_in, filepath_out = getFilepaths(entityName, FileType.XYZ_LOW_RES,
                                             FileType.XYZ_FILLED_IN,
                                             forceOverwrite)
    shutil.copyfile(filepath_in, filepath_out)

Example #11

0

Show file

        out_fp.write(out_template.format("X", "Y", "Z"))
        for y in tqdm(ys):
            valid_xs = uk_landmass.filter_included_all_y(
                xs_sorted, float(y), y_threshold, x_threshold)
            for x in valid_xs:
                z = xy_to_z.get((str(x), y), 'nothing found')
                if z == 'nothing found':
                    out_fp.write(out_template.format(x, y, 0))
                    count += 1
                else:
                    out_fp.write(out_template.format(x, y, z))
    log("0 value filled in for {} pixels".format(count))


def processEntityLowResToFilledInEnglandAndWales(entityName,
                                                 forceOverwrite=False):
    processEntityLowResToFilledIn(entityName, forceOverwrite,
                                  SOURCE_OF_TRUTH_ENGLAND_WALES)


if __name__ == '__main__':
    for filename in os.listdir(FILE_LOCATIONS[FileType.XYZ_LOW_RES]):
        entityName = os.path.splitext(filename)[0]
        _, filepath_out = getFilepaths(entityName, FileType.XYZ_LOW_RES,
                                       FileType.XYZ_FILLED_IN)

        if os.path.exists(filepath_out) and not FORCE_OVERWRITE:
            continue

        processEntityLowResToFilledIn(entityName, FORCE_OVERWRITE)

Example #12

0

Show file

        max_z = max(max_z, min_distance)
        xyz_result.append((x, y, min_distance))

    log('writing result to {} and {}'.format(filepath_out,
                                             filepath_out_inverted))
    if not os.path.exists(filepath_out) or forceOverwrite:
        out_template = "{},{},{}\n"
        with open(filepath_out, 'w') as out_fp:
            out_fp.write(out_template.format("X", "Y", "Z"))
            for x, y, z in tqdm(xyz_result):
                out_fp.write(out_template.format(x, y, z))

    if not os.path.exists(filepath_out_inverted) or forceOverwrite:
        out_template = "{},{},{}\n"
        with open(filepath_out_inverted, 'w') as out_fp:
            out_fp.write(out_template.format("X", "Y", "Z"))
            for x, y, z in tqdm(xyz_result):
                out_fp.write(out_template.format(x, y, max_z - z))


if __name__ == '__main__':
    for filename in os.listdir(FILE_LOCATIONS[FileType.POINT]):
        entityName = os.path.splitext(filename)[0]
        _, filepath_out = getFilepaths(entityName, FileType.POINT,
                                       FileType.XYZ, True)

        if os.path.exists(filepath_out) and not FORCE_OVERWRITE:
            continue

        processEntityPointToDistance(entityName, FORCE_OVERWRITE)

Example #13

0

Show file

def processMSOAEntityMergedToSimplified(entityName, forceOverwrite=False):
    filepath_in, filepath_out = getFilepaths(entityName, FileType.MSOA_MERGED, FileType.MSOA_SIMPLIFIED, forceOverwrite)

    print('reading and loading data structures')
    zs = {}
    polys = {}
    v_to_key = defaultdict(set)
    key_to_vs = defaultdict(list)
    key = 0
    with open(filepath_in, 'r') as fp:
        reader = csv.reader(fp)
        next(reader)  # skip headers
        for i, (z, p) in tqdm(enumerate(reader)):
            loaded = shapely.wkt.loads(p)
            ps = [loaded] if p.startswith('POLYGON') else list(loaded)
            for poly in ps:
                zs[key] = z
                poly = roundAndRemoveInvalidPoints(poly)
                polys[key] = poly
                for v in poly.exterior.coords:
                    key_to_vs[key].append(v)
                    v_to_key[v].add(key)
                key += 1
    maxKey = key

    key = None  # to get run time errors if it is read again
    print('calculating simplified lines between neighbours')
    poly_to_borders = defaultdict(list)
    poly_to_replaced_indecies = defaultdict(list)

    for k in tqdm(range(maxKey)):
        verts = key_to_vs[k]
        neighbours = set()
        for v in verts:
            neighbours = neighbours.union(v_to_key[v])
        for nKey in neighbours:
            if nKey < k or nKey == k:
                continue  # will be handled during other iteration
            commonVerts = set(verts).intersection(key_to_vs[nKey])
            # need a way to detect if they are multiple lines
            start = None
            end = None
            prev_included = False
            start_i, end_i, commonVerts_ids = None, None, []
            for i, v in enumerate(verts):
                if v in commonVerts:
                    commonVerts_ids.append(i)

                    if verts[(i + 1) % len(verts)] not in commonVerts:
                        end = v
                        end_i = i

                    if verts[i - 1] not in commonVerts:
                        start = v
                        start_i = i

            if start_i > end_i:
                b_indecies = list(range(start_i, len(verts))) + list(range(end_i + 1))
                boundaryPoints = verts[start_i:] + verts[:end_i + 1]
            else:
                b_indecies = list(range(start_i, end_i + 1))
                boundaryPoints = verts[start_i:end_i + 1]

            for i in b_indecies:
                assert (i in commonVerts_ids)
            # this only works if the do not cross the start point but if they don't then I assume that the "if not
            # closeEnough" statement will fire. If this statement returns true this means that the points between the
            # start and end point are not a continuous sequence of points on the perimeter of the polygon. There are
            # two reasons that this can happen the first is that there is a slight difference between some non significant
            # bit between the two neighbouring polygons (for this case we just proceed). The second case is where
            # there is two edges between the two polygons this case hasn't been encountered yet and so it is not handled.
            if len(commonVerts_ids) != len(b_indecies):
                print(len(commonVerts_ids), len(b_indecies))
                print([x for x in commonVerts_ids if x + 1 not in commonVerts_ids])
                # check all points closer than epsilon to the existing points which are in the range and if they are just
                # take the start and end this method runs the risk of ruining the corner of the next polygon along, if
                # this happens we should just remove some of the verts so that they wont be common but I think this will
                # be fine
                smallest = min(commonVerts_ids)
                largest = max(commonVerts_ids)
                threshold = 0.01
                for i in range(smallest, largest):
                    pointx, pointy = verts[i]
                    closeEnough = False
                    for commonx, commony in commonVerts:
                        dist = (pointx - commonx) ** 2 + (pointy - commony) ** 2
                        if dist < threshold:
                            closeEnough = True
                            break
                    if not closeEnough:
                        print(i, pointx, pointy)
                        assert False
                print('Happy that close enough')
                start_i = smallest
                end_i = largest
                b_indecies = list(range(start_i, end_i + 1))
                boundaryPoints = verts[start_i:end_i + 1]

            simplifiedBoundary_v, simplifyBoundary_i = simplifyLine(boundaryPoints, b_indecies)
            for i in range(len(simplifiedBoundary_v) - 1):
                start = simplifiedBoundary_v[i]
                start_i = simplifyBoundary_i[i]
                end = simplifiedBoundary_v[i + 1]
                end_i = simplifyBoundary_i[i + 1]
                mid_i = (
                                start_i + end_i) // 2 if start_i < end_i else 0  # mid point is needed to check if it wraps around the start point
                while mid_i not in commonVerts_ids:  # due to an error in a non significant bit
                    mid_i += 1
                poly_to_borders[k].append((start, end, verts[mid_i]))
                poly_to_borders[nKey].append((start, end, verts[mid_i]))

    print('writing out and simplifying map edges')
    template = "{},\"{}\"\n"
    with open(filepath_out, 'w+') as fp:
        fp.write("Z,geometry\n")
        # From this point On I will assume that the start / end points are contiguous
        for k in tqdm(range(maxKey)):
            neighbourBorderPoints = poly_to_borders[k]
            points = key_to_vs[k]
            # get the indecies of the start and end points
            neighbourBorderPointsIndecies = defaultdict(lambda: [inf, inf, inf])
            for pi, p in enumerate(points):
                for ni, (start, end, mid) in enumerate(neighbourBorderPoints):
                    if p == start:
                        neighbourBorderPointsIndecies[ni][0] = pi
                    if p == end:
                        neighbourBorderPointsIndecies[ni][1] = pi
                    if p == mid:
                        neighbourBorderPointsIndecies[ni][2] = pi

            for (start_i, end, mid) in neighbourBorderPointsIndecies.values():
                assert (start is not inf)
                assert (mid is not inf)
                assert (end is not inf)

            # work out if one edge wraps around start point
            doesWrap = False
            for start, end, mid in neighbourBorderPointsIndecies.values():
                smallest = min(start, end)
                largest = max(start, end)
                doesWrap |= not (smallest < mid < largest) and mid != inf

            onEdge = doesWrap
            outPoints = []
            remaining_indecies = sorted(
                [x for x, y, z in neighbourBorderPointsIndecies.values()] + [y for x, y, z in
                                                                             neighbourBorderPointsIndecies.values()])
            # print(k, doesWrap, neighbourBorderPointsIndecies.values())
            next_index = remaining_indecies.pop(0) if remaining_indecies else inf
            nonEdgePoints = []
            current_nonEdge = []
            for pi, p in enumerate(points):
                if pi == next_index:
                    if current_nonEdge:
                        current_nonEdge.append(p)
                        nonEdgePoints.append(current_nonEdge)
                        current_nonEdge = []
                    outPoints.append(p)
                    next_index = remaining_indecies.pop(0) if remaining_indecies else inf
                    if pi == next_index:  # ie end of one and start of another
                        next_index = remaining_indecies.pop(0) if remaining_indecies else inf
                    else:
                        onEdge = not onEdge
                        if not onEdge:
                            current_nonEdge.append(p)
                    continue
                elif not onEdge:
                    current_nonEdge.append(p)
            # wrap around non edge
            if not onEdge and current_nonEdge:
                if nonEdgePoints:
                    nonEdgePoints[0] = current_nonEdge + nonEdgePoints[0]
                else:
                    nonEdgePoints.append(current_nonEdge)

            for nonBoundary in nonEdgePoints:
                simplifiedBoundary_v, simplifyBoundary_i = simplifyLine(nonBoundary,
                                                                        [i for i, _ in enumerate(nonBoundary)])
                simplifiedLine = []
                simplifiedPoints = []
                for i in range(len(simplifiedBoundary_v) - 1):
                    start = simplifiedBoundary_v[i]
                    end = simplifiedBoundary_v[i + 1]
                    simplifiedPoints.append(start)
                    simplifiedLine.append((start, end))
                insertIndex = outPoints.index(end) if outPoints else 0
                outPoints = outPoints[:insertIndex] + simplifiedPoints + outPoints[insertIndex:]

            outPolygon = Polygon(outPoints)
            fp.write(template.format(zs[k], outPolygon))

    if withPlot:
        print('Plotting')
        plot(filepath_out)

    print('Finished')

Example #14

0

Show file

File: MSOA_geoDF_to_merged.py Project: jamesHargreaves12/DataVis_python

def ProcessMSOAEntityGeoDfToMerged(entityName, forceOverwrite=False):
    filepath_in, filepath_out = getFilepaths(entityName, FileType.MSOA_GEO_DF, FileType.MSOA_MERGED, forceOverwrite)
    print('reading and loading data structures')
    zs = {}
    key_to_pop = {}
    polys = {}
    v_to_key = defaultdict(set)
    key_to_vs = defaultdict(set)
    a = []
    key = 0
    ignore_keys = set()
    with open(filepath_in, 'r') as fp:
        reader = csv.reader(fp)
        next(reader)  # skip headers
        for i, (z, p, pop) in tqdm(enumerate(reader)):
            loaded = shapely.wkt.loads(p)
            ps = [loaded] if p.startswith('POLYGON') else list(loaded)
            for poly in ps:
                zs[key] = float(z)
                key_to_pop[key] = float(pop)
                polys[key] = poly
                a.append((key, poly.area))
                for v in poly.exterior.coords:
                    key_to_vs[key].add(v)
                    v_to_key[v].add(key)
                key += 1

    print(sorted(a, key=lambda x: x[1])[-1])
    sorted_as = sorted(a, key=lambda x: x[1])

    print('merging')
    i = 0
    # This is expensive at relatively deterministic for any z value so the result could be turned into a map and then
    # dramatically sped up. But once again this is an optimisation.
    while sorted_as[0][1] < minArea:
        i += 1
        sorted_as = sorted(a, key=lambda x: x[1])
        first = True
        smallestKey = 0
        while smallestKey in ignore_keys or first:
            first = False
            smallestKey, smallestArea = sorted_as.pop(0)

        vs = key_to_vs[smallestKey]
        neighboursSeenOnce = set()
        neighboursSeenAtLeastTwice = set()
        for v in vs:
            for nKey in v_to_key[v]:
                if nKey in neighboursSeenOnce:
                    neighboursSeenAtLeastTwice.add(nKey)
                    neighboursSeenOnce.remove(nKey)
                elif nKey not in neighboursSeenAtLeastTwice:
                    neighboursSeenOnce.add(nKey)
        neighboursSeenAtLeastTwice.remove(smallestKey)
        neighboursSeenAtLeastTwice -= ignore_keys
        if len(neighboursSeenAtLeastTwice) == 0:
            ignore_keys.add(smallestKey)
            continue
        smallestNeighbourKey = neighboursSeenAtLeastTwice.pop()
        smallestNeighbourArea = polys[smallestNeighbourKey].area
        for nKey in neighboursSeenAtLeastTwice:
            area = polys[nKey].area
            if smallestNeighbourArea > area:
                smallestNeighbourArea = area
                smallestNeighbourKey = nKey
        newPoly = cascaded_union([polys[smallestKey], polys[smallestNeighbourKey]])

        polys[key] = newPoly
        popSmallest = key_to_pop[smallestKey]
        popNeighbour = key_to_pop[smallestNeighbourKey]
        key_to_pop[key] = popSmallest + popNeighbour
        zs[key] = (zs[smallestKey] * popSmallest + zs[smallestNeighbourKey] * popNeighbour) / (
                popSmallest + popNeighbour)
        # would likely be more efficient to also move the data from the larger data structures but that is an
        # optimisation we can do later.
        a = [x for x in a if x[0] not in [smallestKey, smallestNeighbourKey]] + [(key, newPoly.area)]
        for v in newPoly.exterior.coords:
            key_to_vs[key].add(v)
            v_to_key[v].add(key)
        key += 1
        ignore_keys.add(smallestKey)
        ignore_keys.add(smallestNeighbourKey)
        if i % 100 == 0:
            print(i, smallestArea)

    print('writing')
    template = "{},\"{}\"\n"
    with open(filepath_out, 'w+') as fp:
        fp.write("Z,geometry\n")
        for k in (polys.keys() - ignore_keys):
            fp.write(template.format(zs[k], polys[k]))

    # Can be useful for debugging:
    # with open('./data/tmp/testMergedOrder.csv','w+') as fp:
    #     fp.write("Z,geometry\n")
    #     for i,k in enumerate(polys.keys() - ignore_keys):
    #         fp.write(template.format(i, polys[k]))

    if withPlot:
        print('plotting')
        plot(filepath_out)

    print('finished')