def get_approximate_discharge_timeseries(sww_filename,
                                         polylines,
                                         desired_ds=0.5,
                                         k_nearest_neighbours=1,
                                         search_mesh=True,
                                         verbose=True):
    """Given an sww_filename and a dictionary of 1D polylines, estimate the
    discharge timeseries through each polyline by interpolating the centroid
    uh/vh onto evenly spaced points on the polyline (with spacing ~ desired_ds),
    computing the flux normal to the line, and using the trapezoidal rule to
    integrate it.

    The interpolation of centroid uh/vh onto the polyline points can be either
    based on 'k-nearest-neighbours', or a direct-search of the mesh triangles.
    The former can be fast and allow for smoothing, while the latter is often
    still fast enough, and might be more accurate.

    The positive/negative discharge direction is determined from the polyline.
    Consider a river channel. If the polyline begins on the left-bank and ends
    on the right bank (left/right defined when facing downstream) then
    discharge in the downstream direction is positive.

    WARNING: The result is approximate only because ANUGA's internal edge
    fluxes are derived differently (with the reimann solver), and because the
    interpolation does not follow ANUGA's, and because your transect might not
    be exactly perpendicular to the flow. None of the methods give an exact
    result at present. 

    Errors can be significant where the solution is changing rapidly.  It may
    be worth comparing multiple cross-sections in the vicinity of the site of
    interest [covering different mesh triangles, with slightly different
    orientations].

    @param sww_filename name of sww file
    @param polylines dictionary of polylines, e.g.
            polylines = {
                'Xsection1': [ [495., 1613.], [495., 1614.], [496., 1615.] ],
                'Xsection2': [ [496., 1614.], [4968., 1615.] ]
                        }
    @param desired_ds point spacing used for trapozoidal integration on
           polylines
    @param k_nearest_neighbours number of nearest neighbours used for
           interpolation of uh/vh onto polylines
    @param search_mesh If True AND k_nearest_neighbours=1, we search the
           mesh vertices to find the triangle containing our point. Otherwise
           do nearest-neighbours on the triangle centroids to estimate the
           'nearest' triangle
    @param verbose

    @return a list of length 2 with the output_times as a numpy array, and a
            dictionary with the flow timeseries

    """

    if (search_mesh) & (k_nearest_neighbours > 1):
        msg = 'k_nearest_neighbours must be 1 when search_mesh is true'
        raise Exception(msg)

    # 2 ways to associate transect points with triangle values
    # 1) knn on centroids, or
    # 2) directly search for mesh triangles containing transect points
    # 1 can be faster + allows for smoothing, but 2 might be usually better
    use_knn = (search_mesh == False) | (k_nearest_neighbours != 1)

    if use_knn:
        # Centroids are used for knn
        p = util.get_centroids(sww_filename, timeSlices=0)
        sww_xy = numpy.vstack([p.x + p.xllcorner,
                               p.y + p.yllcorner]).transpose()
        point_index_kdtree = scipy.spatial.cKDTree(sww_xy)
    else:
        # Vertices are used for mesh search
        p = util.get_output(sww_filename, timeSlices=0)

    # To conserve memory read from netcdf directly
    from anuga.file.netcdf import NetCDFFile
    sww_nc = NetCDFFile(sww_filename)
    ud = sww_nc.variables['xmomentum_c']
    vd = sww_nc.variables['ymomentum_c']
    output_times = sww_nc.variables['time'][:]

    discharge_series = {}

    for pk in polylines.keys():

        if verbose: print pk

        pl_full = polylines[pk]

        for segment_num in range(len(pl_full) - 1):

            pl = [pl_full[segment_num], pl_full[segment_num + 1]]

            segment_length = ( (pl[0][0] - pl[1][0])**2 +\
                               (pl[0][1] - pl[1][1])**2 )**0.5

            # Normal vector
            n1 = (pl[0][1] - pl[1][1]) / segment_length
            n2 = -(pl[0][0] - pl[1][0]) / segment_length

            # Approximate segment as npts points
            npts = int(numpy.ceil(segment_length / (desired_ds) + 1.0))
            gridXY = numpy.vstack([
                scipy.linspace(pl[0][0], pl[1][0], num=npts),
                scipy.linspace(pl[0][1], pl[1][1], num=npts)
            ]).transpose()

            # Actual distance between points
            ds = (numpy.diff(gridXY[:, 0])**2 +
                  numpy.diff(gridXY[:, 1])**2)**0.5
            ds_trapz = numpy.hstack([ds[0], (ds[0:-1] + ds[1:]), ds[-1]]) * 0.5

            if verbose: print 'Finding triangles containing point'

            if use_knn:
                point_distance, point_indices = point_index_kdtree.query(
                    gridXY, k=k_nearest_neighbours)

            else:
                gridXY_offset = gridXY * 0.
                gridXY_offset[:, 0] = gridXY[:, 0] - p.xllcorner
                gridXY_offset[:, 1] = gridXY[:, 1] - p.yllcorner
                point_indices = numpy.zeros(gridXY.shape[0]).astype(int)
                # Provide the order to search the points (might be faster?)
                v1 = p.vols[:, 0]
                search_order_update_freq = 1
                for i in range(gridXY.shape[0]):

                    # For efficiency, we don't recompute the order to search points
                    # everytime
                    if i % search_order_update_freq == 0:
                        # Update the mesh triangle search order
                        first_vertex_d2 = (p.x[v1] - gridXY_offset[i,0])**2 +\
                                          (p.y[v1] - gridXY_offset[i,1])**2
                        search_order = first_vertex_d2.argsort().tolist()
                        # Estimate how often we should update the triangle ordering
                        # Use "distance of point to vertex" / "point spacing"
                        # Crude
                        search_order_update_freq = \
                            int(numpy.ceil((first_vertex_d2[search_order[0]]**0.5)/ds[0]))
                    point_indices[i] =\
                        util.get_triangle_containing_point(p, gridXY_offset[i,:],
                            search_order = search_order)

            if verbose: print 'Computing the flux'

            if k_nearest_neighbours == 1:
                point_uh = ud[:][:, point_indices]
                point_vh = vd[:][:, point_indices]
            else:
                point_uh = numpy.zeros(
                    (len(output_times), len(point_indices[:, 0])))
                point_vh = numpy.zeros(
                    (len(output_times), len(point_indices[:, 0])))
                # Compute the inverse distance weighted uh/vh
                numerator = point_uh * 0.
                denominator = point_uh * 0.
                inv_dist = 1.0 / (point_distance + 1.0e-12
                                  )  #Avoid zero division

                # uh
                for k in range(k_nearest_neighbours):
                    ud_data = ud[:][:, point_indices[:, k]]
                    for ti in range(len(output_times)):
                        numerator[ti, :] += ud_data[ti, :] * inv_dist[:, k]
                        denominator[ti, :] += inv_dist[:, k]
                point_uh = numerator / denominator

                #vh
                numerator *= 0.
                denominator *= 0.
                for k in range(k_nearest_neighbours):
                    vd_data = vd[:][:, point_indices[:, k]]
                    for ti in range(len(output_times)):
                        numerator[ti, :] += vd_data[ti, :] * inv_dist[:, k]
                        denominator[ti, :] += inv_dist[:, k]
                point_vh = numerator / denominator

            Q = [ ((point_uh[i,:]*n1 + point_vh[i,:]*n2)*ds_trapz).sum() \
                    for i in range(len(output_times)) ]

            if segment_num == 0:
                discharge_series[pk] = numpy.array(Q)
            else:
                discharge_series[pk] += numpy.array(Q)

    return [output_times, discharge_series]
    p = util.get_output(sww_file, timeSlices=[0])
    pc = util.get_centroids(p, timeSlices=[0])

    # Get centroid information
    xc = pc.x + pc.xllcorner
    yc = pc.y + pc.yllcorner
    elevation_c = pc.elev

    # Make file connection
    fid = NetCDFFile(sww_file)
    #gauge_indices = [((xc - p[0]) ** 2 + (yc - p[1]) ** 2).argmin()
    #                 for p in gauge_coordinates]
    gauge_indices = []
    for gc in gauge_coordinates:
        new_gauge_point = [gc[0] - p.xllcorner, gc[1] - p.yllcorner]
        gi = util.get_triangle_containing_point(p, new_gauge_point)
        gauge_indices.append(gi) 

    time = fid.variables['time'][:]
    nts = len(time)

    # Export flow data for all gauges
    for i, gi in enumerate(gauge_indices):
        #stage = util._getCentVar(fid, 'stage_c', time_indices=range(nts), space_indices=gi)
        try:
            stage = fid.variables['stage_c'][:, gi]
        except:
            vols = fid.variables['volumes'][gi, :]
            stage = (fid.variables['stage'][:, vols[0]] +
                     fid.variables['stage'][:, vols[1]] +
                     fid.variables['stage'][:, vols[2]]) / 3.
    p = util.get_output(sww_file, timeSlices=[0])
    pc = util.get_centroids(p, timeSlices=[0])

    # Get centroid information
    xc = pc.x + pc.xllcorner
    yc = pc.y + pc.yllcorner
    elevation_c = pc.elev

    # Make file connection
    fid = NetCDFFile(sww_file)
    #gauge_indices = [((xc - p[0]) ** 2 + (yc - p[1]) ** 2).argmin()
    #                 for p in gauge_coordinates]
    gauge_indices = []
    for gc in gauge_coordinates:
        new_gauge_point = [gc[0] - p.xllcorner, gc[1] - p.yllcorner]
        gi = util.get_triangle_containing_point(p, new_gauge_point)
        gauge_indices.append(gi)

    time = fid.variables['time'][:]
    nts = len(time)

    # Export flow data for all gauges
    for i, gi in enumerate(gauge_indices):
        #stage = util._getCentVar(fid, 'stage_c', time_indices=range(nts), space_indices=gi)
        try:
            stage = fid.variables['stage_c'][:, gi]
        except:
            vols = fid.variables['volumes'][gi, :]
            stage = (fid.variables['stage'][:, vols[0]] +
                     fid.variables['stage'][:, vols[1]] +
                     fid.variables['stage'][:, vols[2]]) / 3.
def get_approximate_discharge_timeseries(sww_filename, 
                                         polylines,
                                         desired_ds=0.5, 
                                         k_nearest_neighbours=1,
                                         search_mesh=True, 
                                         verbose=True):
    """Given an sww_filename and a dictionary of 1D polylines, estimate the
    discharge timeseries through each polyline by interpolating the centroid
    uh/vh onto evenly spaced points on the polyline (with spacing ~ desired_ds),
    computing the flux normal to the line, and using the trapezoidal rule to
    integrate it.

    The interpolation of centroid uh/vh onto the polyline points can be either
    based on 'k-nearest-neighbours', or a direct-search of the mesh triangles.
    The former can be fast and allow for smoothing, while the latter is often
    still fast enough, and might be more accurate.

    The positive/negative discharge direction is determined from the polyline.
    Consider a river channel. If the polyline begins on the left-bank and ends
    on the right bank (left/right defined when facing downstream) then
    discharge in the downstream direction is positive.

    WARNING: The result is approximate only because ANUGA's internal edge
    fluxes are derived differently (with the reimann solver), and because the
    interpolation does not follow ANUGA's, and because your transect might not
    be exactly perpendicular to the flow. None of the methods give an exact
    result at present. 

    Errors can be significant where the solution is changing rapidly.  It may
    be worth comparing multiple cross-sections in the vicinity of the site of
    interest [covering different mesh triangles, with slightly different
    orientations].

    @param sww_filename name of sww file
    @param polylines dictionary of polylines, e.g.
            polylines = {
                'Xsection1': [ [495., 1613.], [495., 1614.], [496., 1615.] ],
                'Xsection2': [ [496., 1614.], [4968., 1615.] ]
                        }
    @param desired_ds point spacing used for trapozoidal integration on
           polylines
    @param k_nearest_neighbours number of nearest neighbours used for
           interpolation of uh/vh onto polylines
    @param search_mesh If True AND k_nearest_neighbours=1, we search the
           mesh vertices to find the triangle containing our point. Otherwise
           do nearest-neighbours on the triangle centroids to estimate the
           'nearest' triangle
    @param verbose

    @return a list of length 2 with the output_times as a numpy array, and a
            dictionary with the flow timeseries

    """

    if (search_mesh) & (k_nearest_neighbours > 1):
        msg = 'k_nearest_neighbours must be 1 when search_mesh is true'
        raise Exception(msg)

    # 2 ways to associate transect points with triangle values
    # 1) knn on centroids, or
    # 2) directly search for mesh triangles containing transect points
    # 1 can be faster + allows for smoothing, but 2 might be usually better
    use_knn = (search_mesh == False) | (k_nearest_neighbours != 1)


    if use_knn:
        # Centroids are used for knn
        p = util.get_centroids(sww_filename, timeSlices=0)
        sww_xy = numpy.vstack([p.x+p.xllcorner, p.y+p.yllcorner]).transpose()
        point_index_kdtree = scipy.spatial.cKDTree(sww_xy)
    else:
        # Vertices are used for mesh search
        p = util.get_output(sww_filename, timeSlices=0)

    # To conserve memory read from netcdf directly
    from anuga.file.netcdf import NetCDFFile
    sww_nc = NetCDFFile(sww_filename)
    ud = sww_nc.variables['xmomentum_c']
    vd = sww_nc.variables['ymomentum_c']
    output_times = sww_nc.variables['time'][:]

    discharge_series = {}

    for pk in polylines.keys():

        if verbose: print pk

        pl_full = polylines[pk]

        for segment_num in range(len(pl_full)-1):

            pl = [ pl_full[segment_num], pl_full[segment_num + 1] ]

            segment_length = ( (pl[0][0] - pl[1][0])**2 +\
                               (pl[0][1] - pl[1][1])**2 )**0.5 

            # Normal vector
            n1 = (pl[0][1] - pl[1][1])/segment_length
            n2 = -(pl[0][0] - pl[1][0])/segment_length

            # Approximate segment as npts points
            npts = int(numpy.ceil( segment_length / (desired_ds) + 1.0))
            gridXY = numpy.vstack([scipy.linspace(pl[0][0], pl[1][0], num=npts), 
                                   scipy.linspace(pl[0][1], pl[1][1], num=npts)]
                                 ).transpose()

            # Actual distance between points
            ds = (numpy.diff(gridXY[:,0])**2 + numpy.diff(gridXY[:,1])**2)**0.5
            ds_trapz = numpy.hstack([ ds[0], (ds[0:-1] + ds[1:]), ds[-1]])*0.5

            if verbose: print 'Finding triangles containing point'

            if use_knn:
                point_distance, point_indices = point_index_kdtree.query(gridXY, 
                    k = k_nearest_neighbours)

            else:
                gridXY_offset = gridXY*0.
                gridXY_offset[:,0] = gridXY[:,0] - p.xllcorner
                gridXY_offset[:,1] = gridXY[:,1] - p.yllcorner
                point_indices = numpy.zeros( gridXY.shape[0]).astype(int)
                # Provide the order to search the points (might be faster?)
                v1 = p.vols[:,0]
                search_order_update_freq = 1 
                for i in range(gridXY.shape[0]):

                    # For efficiency, we don't recompute the order to search points
                    # everytime
                    if i%search_order_update_freq==0:
                        # Update the mesh triangle search order
                        first_vertex_d2 = (p.x[v1] - gridXY_offset[i,0])**2 +\
                                          (p.y[v1] - gridXY_offset[i,1])**2
                        search_order = first_vertex_d2.argsort().tolist()
                        # Estimate how often we should update the triangle ordering
                        # Use "distance of point to vertex" / "point spacing"
                        # Crude
                        search_order_update_freq = \
                            int(numpy.ceil((first_vertex_d2[search_order[0]]**0.5)/ds[0]))
                    point_indices[i] =\
                        util.get_triangle_containing_point(p, gridXY_offset[i,:],
                            search_order = search_order) 

            if verbose: print 'Computing the flux'

            if k_nearest_neighbours == 1:
                point_uh = ud[:][:, point_indices]    
                point_vh = vd[:][:, point_indices]
            else:
                point_uh = numpy.zeros( (len(output_times), 
                                         len(point_indices[:,0])))
                point_vh = numpy.zeros( (len(output_times), 
                                         len(point_indices[:,0])))
                # Compute the inverse distance weighted uh/vh 
                numerator = point_uh*0.
                denominator = point_uh*0.
                inv_dist = 1.0/(point_distance+1.0e-12) #Avoid zero division

                # uh
                for k in range(k_nearest_neighbours): 
                    ud_data = ud[:][:,point_indices[:,k]]
                    for ti in range(len(output_times)):
                        numerator[ti,:] += ud_data[ti,:]*inv_dist[:,k]
                        denominator[ti,:] += inv_dist[:,k]
                point_uh = numerator/denominator
                
                #vh
                numerator *= 0.
                denominator *= 0.
                for k in range(k_nearest_neighbours): 
                    vd_data = vd[:][:,point_indices[:,k]]
                    for ti in range(len(output_times)):
                        numerator[ti,:] += vd_data[ti,:]*inv_dist[:,k]
                        denominator[ti,:] += inv_dist[:,k]
                point_vh = numerator/denominator
                    
            Q = [ ((point_uh[i,:]*n1 + point_vh[i,:]*n2)*ds_trapz).sum() \
                    for i in range(len(output_times)) ]

            if segment_num == 0:
                discharge_series[pk] = numpy.array(Q)
            else:
                discharge_series[pk] += numpy.array(Q)

    return [output_times, discharge_series]