Example #1
0
def main():
    """
    """
    # parse the arguments
    parser = argparse.ArgumentParser(
        description='Entry to Pyrad processing framework')

    # positional arguments
    parser.add_argument('days',
                        nargs='+',
                        type=str,
                        help='Dates to process. Format YYYYMMDD')

    # keyword arguments
    parser.add_argument(
        '--euclid_basepath',
        type=str,
        default='/store/msrad/lightning/meteorage/',
        help='name of folder containing the EUCLID lightning data')

    parser.add_argument(
        '--lma_basepath',
        type=str,
        default='/store/msrad/radar/pyrad_products/rad4alp_hydro_PHA/',
        help='name of folder containing the LMA lightning data')

    parser.add_argument(
        '--lma_basename',
        type=str,
        default='Santis_data',
        help='base name of the files containing the LMA lightning data')

    parser.add_argument(
        '--datatypes',
        type=str,
        default='hydro,KDPc,dBZc,RhoHVc,TEMP,ZDRc',
        help='Name of the polarimetric moments to process. Coma separated')

    parser.add_argument(
        '--labels',
        type=str,
        default=('hydro [-],KDPc [deg/Km],dBZc [dBZ],RhoHVc [-],' +
                 'TEMP [deg C],ZDRc [dB]'),
        help='Labels in the csv file for each polarimetric variable')

    parser.add_argument(
        '--nsources_min',
        type=int,
        default=10,
        help='Minimum number of sources to consider the LMA flash valid')

    parser.add_argument(
        '--scale_factor',
        type=float,
        default=1.2,
        help='Factor by which the area covered by the LMA flash has to be ' +
        'enlarged to find EUCLID strokes')

    parser.add_argument(
        '--delay',
        type=float,
        default=100000.,
        help=
        'delay after end of LMA flash where to look for EUCLID strokes [micros]'
    )

    parser.add_argument(
        '--anticipation',
        type=float,
        default=100000.,
        help=
        'anticipation of the start of an LMA flash where to look for EUCLID strokes [micros]'
    )

    parser.add_argument(
        '--min_area',
        type=float,
        default=25.,
        help='Minimum size of the area where to look for an EUCLID stroke [km]'
    )

    parser.add_argument(
        '--euclidtype',
        type=str,
        default='CGt',
        help='Type of Euclid stroke. Can be: CGt, CGp, CGn, IC')

    args = parser.parse_args()

    print("====== EUCLID data processing started: %s" %
          datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"))
    atexit.register(_print_end_msg, "====== EUCLID data processing finished: ")

    day_vec = []
    for day in args.days:
        day_vec.append(datetime.datetime.strptime(day, '%Y%m%d'))

    datatype_vec = args.datatypes.split(',')
    pol_vals_labels = args.labels.split(',')

    if np.size(datatype_vec) != np.size(pol_vals_labels):
        warn(
            str(np.size(datatype_vec)) + ' datatypes but ' +
            str(np.size(pol_vals_labels)) +
            ' labels. Their number must be equal')
        return

    flashnr_sel_all = np.ma.asarray([], dtype=int)
    t_start_sel_all = np.ma.asarray([], dtype=datetime.datetime)
    t_end_sel_all = np.ma.asarray([], dtype=datetime.datetime)
    area_sel_all = np.ma.asarray([])
    neuclid_sel_all = np.ma.asarray([], dtype=int)

    for day in day_vec:
        day_str = day.strftime('%Y%m%d')
        fname_lma = args.lma_basepath + day_str + '_' + args.lma_basename + '.csv'
        fname_euclid = args.euclid_basepath + 'THX/THX' + day.strftime(
            '%y%j0000') + '.prd'

        print('Reading EUCLID data file ' + fname_euclid)
        (stroke_time, lon_euclid, lat_euclid, intens, ns, mode, intra, ax, ki2,
         ecc, incl, sind) = read_meteorage(fname_euclid)

        print('Reading LMA data file ' + fname_lma)
        flashnr, time_data, time_in_flash, lat_lma, lon_lma, alt_lma, dBm, pol_vals_dict = (
            read_lightning_all(fname_lma, labels=pol_vals_labels))

        flashnr_first = np.unique(flashnr, return_index=False)

        # keep strokes of interest
        if args.euclidtype == 'CGt' or args.euclidtype == 'CGp' or args.euclidtype == 'CGn':
            time_EU_filt = stroke_time[intra == 0]
            lon_EU_filt = lon_euclid[intra == 0]
            lat_EU_filt = lat_euclid[intra == 0]

            if args.euclidtype == 'CGp':
                intens_EU_filt = intens[intra == 0]

                time_EU_filt = time_EU_filt[intens_EU_filt > 0.]
                lon_EU_filt = lon_EU_filt[intens_EU_filt > 0.]
                lat_EU_filt = lat_EU_filt[intens_EU_filt > 0.]
            elif args.euclidtype == 'CGn':
                intens_EU_filt = intens[intra == 0]

                time_EU_filt = time_EU_filt[intens_EU_filt < 0.]
                lon_EU_filt = lon_EU_filt[intens_EU_filt < 0.]
                lat_EU_filt = lat_EU_filt[intens_EU_filt < 0.]
        else:
            time_EU_filt = stroke_time[intra == 1]
            lon_EU_filt = lon_euclid[intra == 1]
            lat_EU_filt = lat_euclid[intra == 1]

        # get Swiss coordinates
        chy_EU, chx_EU, _ = wgs84_to_swissCH1903(lon_EU_filt,
                                                 lat_EU_filt,
                                                 np.zeros(lon_EU_filt.size),
                                                 no_altitude_transform=True)

        chy_lma, chx_lma, _ = wgs84_to_swissCH1903(lon_lma,
                                                   lat_lma,
                                                   alt_lma,
                                                   no_altitude_transform=True)

        flashnr_filt = np.asarray([], dtype=int)
        time_data_filt = np.asarray([], dtype=datetime.datetime)
        time_in_flash_filt = np.asarray([], dtype=float)
        lat_filt = np.asarray([], dtype=float)
        lon_filt = np.asarray([], dtype=float)
        alt_filt = np.asarray([], dtype=float)
        dBm_filt = np.asarray([], dtype=float)

        pol_vals_dict_filt = dict()
        for label in pol_vals_labels:
            pol_vals_dict_filt.update({label: np.ma.asarray([])})
        nflashes_time_rejected = 0
        nflashes_area_rejected = 0
        nflashes_insufficient_sources = 0
        nflashes_small_area_rejected = 0
        nstrokes_accepted = 0

        flashnr_sel = np.ma.asarray([], dtype=int)
        t_start_sel = np.ma.asarray([], dtype=datetime.datetime)
        t_end_sel = np.ma.asarray([], dtype=datetime.datetime)
        area_sel = np.ma.asarray([])
        neuclid_sel = np.ma.asarray([], dtype=int)

        for flash_ID in flashnr_first:
            # get LMA data of flash
            flashnr_flash = flashnr[flashnr == flash_ID]
            time_data_flash = time_data[flashnr == flash_ID]
            time_in_flash_flash = time_in_flash[flashnr == flash_ID]
            lat_flash = lat_lma[flashnr == flash_ID]
            lon_flash = lon_lma[flashnr == flash_ID]
            alt_flash = alt_lma[flashnr == flash_ID]
            dBm_flash = dBm[flashnr == flash_ID]
            pol_vals_dict_flash = dict()
            for key in pol_vals_dict.keys():
                pol_vals_dict_flash.update(
                    {key: pol_vals_dict[key][flashnr == flash_ID]})

            if flashnr_flash.size < args.nsources_min:
                # print('Not enough sources for flash '+str(flash_ID))
                nflashes_insufficient_sources += 1
                continue

            chy_flash = chy_lma[flashnr == flash_ID]
            chx_flash = chx_lma[flashnr == flash_ID]

            # check if there are EUCLID strokes within LMA time
            t_start = time_data_flash[0] - datetime.timedelta(
                microseconds=args.anticipation)
            t_end = time_data_flash[-1] + datetime.timedelta(
                microseconds=args.delay)
            ind = np.where(
                np.logical_and(time_EU_filt >= t_start,
                               time_EU_filt <= t_end))[0]
            if ind.size == 0:
                # print('No EUCLID '+args.euclidtype+' flashes within time of LMA flash '+str(flash_ID))
                nflashes_time_rejected += 1
                continue

            # check if there are EUCLID strokes within LMA area
            lon_EU_flash = lon_EU_filt[ind]
            lat_EU_flash = lat_EU_filt[ind]

            chy_EU_flash = chy_EU[ind]
            chx_EU_flash = chx_EU[ind]

            points_flash_lma = shapely.geometry.MultiPoint(
                list(zip(chy_flash, chx_flash)))
            points_EU = shapely.geometry.MultiPoint(
                list(zip(chy_EU_flash, chx_EU_flash)))

            rectangle_lma = points_flash_lma.minimum_rotated_rectangle
            # print('LMA area before scaling : '+str(rectangle_lma.area*1e-6))

            if rectangle_lma.area * 1e-6 == 0:
                # print('LMA area too small for flash '+str(flash_ID))
                nflashes_small_area_rejected += 1

                # Plot position of LMA sources AND EUCLID stroke
                types = np.zeros(lat_flash.size)

                figfname = args.lma_basepath + 'rejected_' + day_str + '_' + str(
                    flash_ID) + '_LMA_EUCLID_' + args.euclidtype + '_pos.png'
                figfname = plot_pos(
                    lat_flash,
                    lon_flash,
                    types, [figfname],
                    cb_label='Type of detection 0: LMA, 1: EUCLID',
                    titl=day_str + ' ' + str(flash_ID) +
                    ' rejected LMA flash\nLMA and EUCLID ' + args.euclidtype +
                    ' positions')
                print('Plotted ' + ' '.join(figfname))

                continue

            roi_lma = shapely.affinity.scale(rectangle_lma,
                                             xfact=args.scale_factor,
                                             yfact=args.scale_factor)

            area_roi = roi_lma.area * 1e-6
            if area_roi < args.min_area:
                scale_factor = args.scale_factor
                while area_roi < args.min_area:
                    scale_factor += 0.1
                    roi_lma = shapely.affinity.scale(rectangle_lma,
                                                     xfact=scale_factor,
                                                     yfact=scale_factor)
                    area_roi = roi_lma.area * 1e-6
                # print('scale_factor: '+str(scale_factor))
            # print('LMA area after scaling : '+str(roi_lma.area*1e-6))

            if roi_lma.disjoint(points_EU):
                # print('No EUCLID '+args.euclidtype+' flashes within area of LMA flash '+str(flash_ID))
                nflashes_area_rejected += 1

                #                # Plot position of LMA sources AND EUCLID stroke
                #                lats = np.append(lat_flash, lat_EU_flash)
                #                lons = np.append(lon_flash, lon_EU_flash)
                #                types = np.append(np.zeros(lat_flash.size), np.ones(lat_EU_flash.size))
                #
                #                figfname = args.lma_basepath+'rejected_'+day_str+'_'+str(flash_ID)+'_LMA_EUCLID_'+args.euclidtype+'_pos.png'
                #                figfname = plot_pos(
                #                    lats, lons, types, [figfname],
                #                    cb_label='Type of detection 0: LMA, 1: EUCLID',
                #                    titl=day_str+' '+str(flash_ID)+' rejected LMA flash\nLMA and EUCLID '+args.euclidtype+' positions')
                #                print('Plotted '+' '.join(figfname))

                continue

            flashnr_filt = np.append(flashnr_filt, flashnr_flash)
            time_data_filt = np.append(time_data_filt, time_data_flash)
            time_in_flash_filt = np.append(time_in_flash_filt,
                                           time_in_flash_flash)
            lat_filt = np.append(lat_filt, lat_flash)
            lon_filt = np.append(lon_filt, lon_flash)
            alt_filt = np.append(alt_filt, alt_flash)
            dBm_filt = np.append(dBm_filt, dBm_flash)
            for key in pol_vals_dict_filt.keys():
                pol_vals_dict_filt[key] = np.ma.append(
                    pol_vals_dict_filt[key], pol_vals_dict_flash[key])

            if not roi_lma.contains(points_EU):
                points_EU = points_EU.intersection(roi_lma)

                inds = []
                if points_EU.geom_type == 'Point':
                    ind = np.where(
                        np.logical_and(chy_EU_flash == points_EU.x,
                                       chx_EU_flash == points_EU.y))
                    if len(ind) == 1:
                        ind = ind[0]
                    inds.extend(ind)
                else:
                    points_EU_list = list(points_EU)
                    for point in points_EU_list:
                        ind = np.where(
                            np.logical_and(chy_EU_flash == point.x,
                                           chx_EU_flash == point.y))
                        if len(ind) == 1:
                            ind = ind[0]
                        inds.extend(ind)
                lon_EU_flash = lon_EU_flash[inds]
                lat_EU_flash = lat_EU_flash[inds]

            print(
                str(lon_EU_flash.size) + ' EUCLID ' + args.euclidtype +
                ' flashes for LMA flash ' + str(flash_ID))
            nstrokes_accepted += lon_EU_flash.size

            #            # Plot position of LMA sources AND EUCLID stroke
            #            lats = np.append(lat_flash, lat_EU_flash)
            #            lons = np.append(lon_flash, lon_EU_flash)
            #            types = np.append(np.zeros(lat_flash.size), np.ones(lat_EU_flash.size))
            #
            #            figfname = args.lma_basepath+day_str+'_'+str(flash_ID)+'_LMA_EUCLID_'+args.euclidtype+'_pos.png'
            #            figfname = plot_pos(
            #                lats, lons, types, [figfname],
            #                cb_label='Type of detection 0: LMA, 1: EUCLID',
            #                titl=day_str+' '+str(flash_ID)+' LMA flash\nLMA and EUCLID '+args.euclidtype+' positions')
            #            print('Plotted '+' '.join(figfname))

            flashnr_sel = np.append(flashnr_sel, flash_ID)
            t_start_sel = np.append(t_start_sel, time_data_flash[0])
            t_end_sel = np.append(t_end_sel, time_data_flash[-1])
            area_sel = np.append(area_sel, rectangle_lma.area * 1e-6)
            neuclid_sel = np.append(neuclid_sel, lon_EU_flash.size)

        # Plot histogram of number of EUCLID strokes
        bins_edges = np.arange(-0.5, 21.5, 1)
        fname_hist = args.lma_basepath + day_str + '_' + args.lma_basename + '_n' + args.euclidtype + '_hist.png'
        fname_hist = plot_histogram(
            bins_edges,
            neuclid_sel, [fname_hist],
            labelx='Number of EUCLID strokes per LMA flash',
            titl=day_str + ' EUCLID ' + args.euclidtype +
            ' strokes per LMA flash')
        print('Plotted ' + ' '.join(fname_hist))

        fname_hist = args.lma_basepath + day_str + '_' + args.lma_basename + '_n' + args.euclidtype + '_hist.csv'
        _, hist_neuclid = compute_histogram(neuclid_sel,
                                            None,
                                            bin_edges=bins_edges)
        hist_neuclid, _ = np.histogram(hist_neuclid, bins=bins_edges)
        fname_hist = write_histogram(bins_edges, hist_neuclid, fname_hist)
        print('Written ' + fname_hist)

        # Plot histogram of LMA flash area
        bins_edges = np.arange(0., 2010., 10.)
        fname_hist = args.lma_basepath + day_str + '_' + args.lma_basename + '_' + args.euclidtype + '_LMA_area_hist.png'
        fname_hist = plot_histogram(
            bins_edges,
            area_sel, [fname_hist],
            labelx='Area of LMA flash [km2]',
            titl=day_str + ' area of LMA flash with associated EUCLID ' +
            args.euclidtype + ' strokes')
        print('Plotted ' + ' '.join(fname_hist))

        fname_hist = args.lma_basepath + day_str + '_' + args.lma_basename + '_' + args.euclidtype + '_LMA_area_hist.csv'
        _, hist_area = compute_histogram(area_sel, None, bin_edges=bins_edges)
        hist_area, _ = np.histogram(hist_area, bins=bins_edges)
        fname_hist = write_histogram(bins_edges, hist_area, fname_hist)
        print('Written ' + fname_hist)

        # Plot histogram of LMA flash duration [milliseconds]
        duration = np.ma.zeros(t_end_sel.size)
        for i in range(t_end_sel.size):
            duration[i] = 1e3 * (t_end_sel[i] - t_start_sel[i]).total_seconds()

        bins_edges = np.arange(0., 1010., 10.)
        fname_hist = args.lma_basepath + day_str + '_' + args.lma_basename + '_' + args.euclidtype + '_LMA_duration_hist.png'
        fname_hist = plot_histogram(
            bins_edges,
            duration, [fname_hist],
            labelx='Duration of LMA flash [ms]',
            titl=day_str + ' Duration of LMA flash with associated EUCLID ' +
            args.euclidtype + ' strokes')
        print('Plotted ' + ' '.join(fname_hist))

        fname_hist = args.lma_basepath + day_str + '_' + args.lma_basename + '_' + args.euclidtype + '_LMA_duration_hist.csv'
        _, hist_duration = compute_histogram(duration,
                                             None,
                                             bin_edges=bins_edges)
        hist_duration, _ = np.histogram(hist_duration, bins=bins_edges)
        fname_hist = write_histogram(bins_edges, hist_duration, fname_hist)
        print('Written ' + fname_hist)

        flashnr_filt_first = np.unique(flashnr_filt, return_index=False)
        print('N EUCLID strokes: ' + str(time_EU_filt.size))
        print('N EUCLID strokes in LMA flashes accepted: ' +
              str(nstrokes_accepted) + '\n')

        print('N LMA flashes: ' + str(flashnr_first.size))
        print('N LMA flashes insufficient sources: ' +
              str(nflashes_insufficient_sources))
        print('N LMA flashes time rejected: ' + str(nflashes_time_rejected))
        print('N LMA flashes small area rejected: ' +
              str(nflashes_small_area_rejected))
        print('N LMA flashes area rejected: ' + str(nflashes_area_rejected))
        print('N LMA flashes accepted: ' + str(flashnr_filt_first.size))
        print('N LMA sources accepted: ' + str(flashnr_filt.size) + '\n\n\n')

        # write the results in a file
        vals_list = []
        for label in pol_vals_labels:
            vals_list.append(pol_vals_dict_filt[label])

        fname = args.lma_basepath + day_str + '_' + args.lma_basename + '_' + args.euclidtype + '.csv'
        write_ts_lightning(flashnr_filt, time_data_filt, time_in_flash_filt,
                           lat_filt, lon_filt, alt_filt, dBm_filt, vals_list,
                           fname, pol_vals_labels)
        print('written to ' + fname)

        flashnr_sel_all = np.append(flashnr_sel_all, flashnr_sel)
        t_start_sel_all = np.append(t_start_sel_all, t_start_sel)
        t_end_sel_all = np.append(t_end_sel_all, t_end_sel)
        area_sel_all = np.append(area_sel_all, area_sel)
        neuclid_sel_all = np.append(neuclid_sel_all, neuclid_sel)

    # Plot histogram of number of EUCLID strokes
    bins_edges = np.arange(-0.5, 21.5, 1)
    fname_hist = args.lma_basepath + args.lma_basename + '_n' + args.euclidtype + '_hist.png'
    fname_hist = plot_histogram(
        bins_edges,
        neuclid_sel_all, [fname_hist],
        labelx='Number of EUCLID strokes per LMA flash',
        titl='EUCLID ' + args.euclidtype + ' strokes per LMA flash')
    print('Plotted ' + ' '.join(fname_hist))

    fname_hist = args.lma_basepath + args.lma_basename + '_n' + args.euclidtype + '_hist.csv'
    _, hist_neuclid = compute_histogram(neuclid_sel_all,
                                        None,
                                        bin_edges=bins_edges)
    hist_neuclid, _ = np.histogram(hist_neuclid, bins=bins_edges)
    fname_hist = write_histogram(bins_edges, hist_neuclid, fname_hist)
    print('Written ' + fname_hist)

    # Plot histogram of LMA flash area
    bins_edges = np.arange(0., 2010., 10.)
    fname_hist = args.lma_basepath + args.lma_basename + '_' + args.euclidtype + '_LMA_area_hist.png'
    fname_hist = plot_histogram(
        bins_edges,
        area_sel_all, [fname_hist],
        labelx='Area of LMA flash [km2]',
        titl='area of LMA flash with associated EUCLID ' + args.euclidtype +
        ' strokes')
    print('Plotted ' + ' '.join(fname_hist))

    fname_hist = args.lma_basepath + args.lma_basename + '_' + args.euclidtype + '_LMA_area_hist.csv'
    _, hist_area = compute_histogram(area_sel_all, None, bin_edges=bins_edges)
    hist_area, _ = np.histogram(hist_area, bins=bins_edges)
    fname_hist = write_histogram(bins_edges, hist_area, fname_hist)
    print('Written ' + fname_hist)

    # Plot histogram of LMA flash duration [milliseconds]
    duration = np.ma.zeros(t_end_sel.size)
    for i in range(t_end_sel.size):
        duration[i] = 1e3 * (t_end_sel_all[i] -
                             t_start_sel_all[i]).total_seconds()
    print(duration[i])
    bins_edges = np.arange(0., 1010., 10.)
    fname_hist = args.lma_basepath + args.lma_basename + '_' + args.euclidtype + '_LMA_duration_hist.png'
    fname_hist = plot_histogram(
        bins_edges,
        duration, [fname_hist],
        labelx='Duration of LMA flash [ms]',
        titl='Duration of LMA flash with associated EUCLID ' +
        args.euclidtype + ' strokes')
    print('Plotted ' + ' '.join(fname_hist))

    fname_hist = args.lma_basepath + args.lma_basename + '_' + args.euclidtype + '_LMA_duration_hist.csv'
    _, hist_duration = compute_histogram(duration, None, bin_edges=bins_edges)
    hist_duration, _ = np.histogram(hist_duration, bins=bins_edges)
    fname_hist = write_histogram(bins_edges, hist_duration, fname_hist)
    print('Written ' + fname_hist)
def main():
    """
    """

    # parse the arguments
    parser = argparse.ArgumentParser(
        description='Entry to Pyrad processing framework')

    # keyword arguments
    parser.add_argument('--database',
                        type=str,
                        default='/store/msrad/radar/pyrad_products/',
                        help='base path to the radar data')

    parser.add_argument(
        '--datadirs',
        type=str,
        default=(
            'mals_sha_windmills_point_psr_filtered_WM1_20200304-20200311,'
            'mals_sha_windmills_point_psr_filtered_WM1_20200312-20200315,'
            'mals_sha_windmills_point_psr_filtered_WM1_20200316-20200320,'
            'mals_sha_windmills_point_psr_filtered_WM1_20200321-20200325'),
        help='directories containing data')

    parser.add_argument(
        '--datatypes',
        type=str,
        default='dBuZ,dBuZv,rcs_h,rcs_v,ZDRu,RhoHVu,uPhiDPu,Vu,Wu',
        help='Data types. Coma separated')

    args = parser.parse_args()

    print("====== PYRAD windmill data processing started: %s" %
          datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"))
    atexit.register(_print_end_msg,
                    "====== PYRAD windmill data processing finished: ")

    datadirs = args.datadirs.split(',')
    datatypes = args.datatypes.split(',')

    # Read periods of processing
    for datatype in datatypes:
        first_read = False
        for datadir in datadirs:
            # Read data time series files
            flist = glob.glob(args.database + datadir + '/' + datatype +
                              '_TS/TS/ts_POINT_MEASUREMENT_hist_' + datatype +
                              '.csv')
            if not flist:
                continue

            hist_aux, bin_edges_aux = read_histogram(flist[0])
            if not first_read:
                hist = hist_aux
                bin_edges = bin_edges_aux
                first_read = True
                continue

            hist += hist_aux

        basepath = os.path.dirname(flist[0]) + '/'

        # Histogram plots
        field_name = get_fieldname_pyart(datatype)
        field_dict = get_metadata(field_name)

        fname = args.database + 'ts_POINT_MEASUREMENT_hist_' + datatype + '.png'

        bin_centers = bin_edges[:-1] + ((bin_edges[1] - bin_edges[0]) / 2.)
        fname = plot_histogram2(bin_centers,
                                hist, [fname],
                                labelx=get_colobar_label(
                                    field_dict, field_name),
                                titl=datatype)
        print('Plotted ' + ' '.join(fname))

        fname = args.database + 'ts_POINT_MEASUREMENT_hist_' + datatype + '.csv'
        fname = write_histogram(bin_edges, hist, fname)
        print('Written ' + fname)
Example #3
0
def main():
    """
    """

    # parse the arguments
    parser = argparse.ArgumentParser(
        description='Entry to Pyrad processing framework')

    # keyword arguments
    parser.add_argument('--database',
                        type=str,
                        default='/store/msrad/radar/pyrad_products/',
                        help='base path to the radar data')

    parser.add_argument(
        '--datadirs',
        type=str,
        default=(
            'mals_sha_windmills_point_psr_filtered_WM1_20200304-20200311,'
            'mals_sha_windmills_point_psr_filtered_WM1_20200312-20200315,'
            'mals_sha_windmills_point_psr_filtered_WM1_20200316-20200320,'
            'mals_sha_windmills_point_psr_filtered_WM1_20200321-20200325'),
        help='directories containing data')

    parser.add_argument(
        '--datatypes',
        type=str,
        default='dBuZ,dBuZv,rcs_h,rcs_v,uPhiDPu,RhoHVu,ZDRu,Vu,Wu',
        help='Data types. Coma separated')

    parser.add_argument(
        '--orientations',
        type=str,
        default=
        '0,10,20,30,40,50,60,70,80,90,100,110,120,130,140,150,160,170,180,190,200,210,220,230,240,250,260,270,280,290,300,310,320,330,340,350',
        help='Orientation respect to radar')

    parser.add_argument('--span', type=float, default=10., help='Span')

    parser.add_argument('--vel_limit',
                        type=float,
                        default=0.,
                        help='Velocity limit')

    args = parser.parse_args()

    print("====== PYRAD windmill data processing started: %s" %
          datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"))
    atexit.register(_print_end_msg,
                    "====== PYRAD windmill data processing finished: ")

    datadirs = args.datadirs.split(',')
    datatypes = args.datatypes.split(',')

    orientations = np.asarray(args.orientations.split(','), dtype=float)
    speeds = [
        'speed_GT' + str(args.vel_limit), 'speed_LE' + str(args.vel_limit)
    ]

    scan_type = 'ppi'

    for ori in orientations:
        for speed in speeds:
            for datatype in datatypes:
                first_read = False
                for datadir in datadirs:
                    # Read data time series files
                    flist = glob.glob(args.database + datadir + '/' +
                                      datatype + '_TS/TS/' + datatype +
                                      '_span' + str(args.span) + '_ori' +
                                      str(ori) + '_' + speed + '_hist.csv')

                    if not flist:
                        continue

                    hist_aux, bin_edges_aux = read_histogram(flist[0])
                    if not first_read:
                        hist = hist_aux
                        bin_edges = bin_edges_aux
                        first_read = True
                        continue

                    hist += hist_aux

                if not first_read:
                    warn('No files for orientation ' + str(ori) + ' and ' +
                         speed)
                    continue

                # Histogram plots
                field_name = get_fieldname_pyart(datatype)
                field_dict = get_metadata(field_name)

                fname = (args.database + datatype + '_span' + str(args.span) +
                         '_ori' + str(ori) + '_' + speed + '_hist.png')

                titl = (datatype + ' span ' + str(args.span) + ' ori ' +
                        str(ori) + ' ' + speed)

                bin_centers = bin_edges[:-1] + (
                    (bin_edges[1] - bin_edges[0]) / 2.)
                fname = plot_histogram2(bin_centers,
                                        hist, [fname],
                                        labelx=get_colobar_label(
                                            field_dict, field_name),
                                        titl=titl)
                print('Plotted ' + ' '.join(fname))

                fname = (args.database + datatype + '_span' + str(args.span) +
                         '_ori' + str(ori) + '_' + speed + '_hist.csv')
                fname = write_histogram(bin_edges, hist, fname)
                print('Written ' + fname)
Example #4
0
def main():
    """
    """
    # parse the arguments
    parser = argparse.ArgumentParser(
        description='Entry to Pyrad processing framework')

    # positional arguments
    parser.add_argument('days',
                        nargs='+',
                        type=str,
                        help='Dates to process. Format YYYYMMDD')

    # keyword arguments
    parser.add_argument(
        '--basepath',
        type=str,
        default='/store/msrad/radar/pyrad_products/rad4alp_hydro_PHA/',
        help='name of folder containing the radar data')

    parser.add_argument(
        '--datatypes',
        type=str,
        default='hydro,KDPc,dBZc,RhoHVc,TEMP,ZDRc',
        help='Name of the polarimetric moments to process. Coma separated')

    parser.add_argument('--steps',
                        type=str,
                        default='None,0.05,0.5,0.001,1.,0.1',
                        help='Step of the histogram for each data type')

    args = parser.parse_args()

    print("====== LMA trajectories radar data processing started: %s" %
          datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"))
    atexit.register(
        _print_end_msg,
        "====== LMA trajectories radar data processing finished: ")

    day_vec = []
    for day in args.days:
        day_vec.append(datetime.datetime.strptime(day, '%Y%m%d'))

    datatype_vec = args.datatypes.split(',')
    steps = args.steps.split(',')

    if np.size(datatype_vec) != np.size(steps):
        warn(
            str(np.size(datatype_vec)) + ' datatypes but ' +
            str(np.size(steps)) + ' steps. Their number must be equal')
        return

    step_list = []
    for step in steps:
        if step == 'None':
            step_list.append(None)
        else:
            step_list.append(float(step))

    for j, datatype in enumerate(datatype_vec):
        step = step_list[j]

        field_name = get_fieldname_pyart(datatype)
        field_dict = get_metadata(field_name)

        labelx = get_colobar_label(field_dict, field_name)

        values_list = []
        values_first_list = []
        flash_cnt = 0
        source_cnt = 0
        for day in day_vec:
            day_dir = day.strftime('%Y-%m-%d')
            day_str = day.strftime('%Y%m%d')

            fname_test = (args.basepath + day_dir + '/*_traj/AT_FLASH/' +
                          day_str + '*_allflash_ts_trajlightning_' + datatype +
                          '.csv')
            fname_list = glob.glob(fname_test)
            if not fname_list:
                warn('No file found in ' + fname_test)
                continue

            fname = fname_list[0]

            basepath_out = os.path.dirname(fname)
            fname_first_source = (basepath_out + '/' + day_str +
                                  '_firstsource_ts_trajlightning_' + datatype +
                                  '.png')

            fname_all_sources = (basepath_out + '/' + day_str +
                                 '_allsources_ts_trajlightning_' + datatype +
                                 '.png')

            print('\nReading file ' + fname)
            time_flash, flashnr, _, val_at_flash, _, _, _, _ = (
                read_lightning_traj(fname))

            print('N sources: ' + str(flashnr.size))
            source_cnt += flashnr.size

            # Plot all sources histogram
            bins, values = compute_histogram(val_at_flash,
                                             field_name,
                                             step=step)
            print('Valid values: ' + str(values.size))

            values_list.extend(values)

            plot_histogram(bins,
                           values, [fname_all_sources],
                           labelx=labelx,
                           titl=("Trajectory Histogram %s" %
                                 time_flash[0].strftime("%Y-%m-%d")))

            print("----- plot to '%s'" % fname_all_sources)

            # Get and plot first sources histogram
            flashnr_first, unique_ind = np.unique(flashnr, return_index=True)

            print('N flashes: ' + str(flashnr_first.size))
            flash_cnt += flashnr_first.size

            val_first = val_at_flash[unique_ind]
            time_flash_first = time_flash[unique_ind]

            bins, values = compute_histogram(val_first, field_name, step=step)

            values_first_list.extend(values)

            plot_histogram(bins,
                           values, [fname_first_source],
                           labelx=labelx,
                           titl=("Trajectory Histogram First Source %s" %
                                 time_flash_first[0].strftime("%Y-%m-%d")))

            print("----- plot to '%s'" % fname_first_source)

        print('N sources total: ' + str(source_cnt))
        print('N flashes total: ' + str(flash_cnt))

        values_list = np.asarray(values_list)
        values_first_list = np.asarray(values_first_list)

        print('Valid values total: ' + str(values_list.size))
        print('Valid flashes total: ' + str(values_first_list.size))

        # Plot all sources histogram
        fname_all_sources = (args.basepath + '/allsources_ts_trajlightning_' +
                             datatype + '.png')
        plot_histogram(bins,
                       values_list, [fname_all_sources],
                       labelx=labelx,
                       titl="Trajectory Histogram All Sources")

        print("----- plot to '%s'" % fname_all_sources)

        # store histogram
        fname_all_sources = (args.basepath + 'allsources_ts_trajlightning_' +
                             datatype + '.csv')
        hist_values, _ = np.histogram(values_list, bins=bins)
        write_histogram(bins, hist_values, fname_all_sources)
        print('Written ' + ' '.join(fname_all_sources))

        # Plot first source histogram
        fname_first_source = (args.basepath + 'firstsource_ts_trajlightning_' +
                              datatype + '.png')
        plot_histogram(bins,
                       values_first_list, [fname_first_source],
                       labelx=labelx,
                       titl="Trajectory Histogram First Source")

        print("----- plot to '%s'" % fname_first_source)

        # store histogram
        fname_first_source = (args.basepath + 'firstsource_ts_trajlightning_' +
                              datatype + '.csv')
        hist_values_first, _ = np.histogram(values_first_list, bins=bins)
        write_histogram(bins, hist_values_first, fname_first_source)
        print('Written ' + ' '.join(fname_all_sources))
def main():
    """
    """
    # basepath = '/data/pyrad_products/rad4alp_hydro_PHA/'
    basepath = '/store/msrad/radar/pyrad_products/rad4alp_hydro_PHA/'
    day_vec = [
        datetime.datetime(2017, 6, 29),
        datetime.datetime(2017, 6, 30),
        datetime.datetime(2017, 7, 10),
        datetime.datetime(2017, 7, 14),
        datetime.datetime(2017, 7, 18),
        datetime.datetime(2017, 7, 19),
        datetime.datetime(2017, 7, 30),
        datetime.datetime(2017, 8, 1)
    ]

    #    day_vec = [
    #        datetime.datetime(2017, 7, 14)]

    basename = 'Santis_data_entropy_CGpn'
    filt_type = 'keep_all'
    nsources_min = 10

    if 'entropy' in basename:
        pol_vals_labels = [
            'hydro', 'entropy', 'propAG', 'propCR', 'propIH', 'propLR',
            'propMH', 'propRN', 'propRP', 'propVI', 'propWS'
        ]

        datatype_vec = [
            'hydro', 'entropy', 'propAG', 'propCR', 'propIH', 'propLR',
            'propMH', 'propRN', 'propRP', 'propVI', 'propWS'
        ]

        step_list = [None, 0.1, 1., 1., 1., 1., 1., 1., 1., 1., 1.]
    else:
        pol_vals_labels = [
            'hydro [-]', 'KDPc [deg/Km]', 'dBZc [dBZ]', 'RhoHVc [-]',
            'TEMP [deg C]', 'ZDRc [dB]'
        ]

        datatype_vec = ['hydro', 'KDPc', 'dBZc', 'RhoHVc', 'TEMP', 'ZDRc']

        step_list = [None, 0.05, 0.5, 0.001, 1., 0.1]

    for label in pol_vals_labels:
        if 'hydro' in label:
            hydro_label = label
            break

    print("====== Lightning post-processing started: %s" %
          datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"))
    atexit.register(_print_end_msg,
                    "====== Lightning post-processing finished: ")

    # read all the data to analyze
    flashnr, time_data, time_in_flash, lat, lon, alt, dBm, pol_vals_dict = (
        read_data(basepath,
                  day_vec,
                  basename=basename,
                  pol_vals_labels=pol_vals_labels))

    #    flashnr, time_data, time_in_flash, lat, lon, alt, dBm, pol_vals_dict = (
    #        read_data_two_sources(
    #            basepath, day_vec, basename1='Santis_data_entropy',
    #            basename2='Santis_data_entropy_CGt', basename_out='Santis_data_entropy_no_CG',
    #            keep_common=False,
    #            pol_vals_labels=pol_vals_labels))

    # Get indices of data to keep
    if filt_type == 'keep_all':
        ind, data_ID, subtitl = get_indices_all_data(flashnr,
                                                     nsources_min=nsources_min)
    elif filt_type == 'keep_solid':
        ind, data_ID, subtitl = get_indices_solid_phase(
            flashnr, pol_vals_dict[hydro_label], nsources_min=nsources_min)
    elif filt_type == 'keep_liquid':
        ind, data_ID, subtitl = get_indices_liquid_phase(
            flashnr, pol_vals_dict[hydro_label], nsources_min=nsources_min)
    elif filt_type == 'keep_liquid_origin':
        ind, data_ID, subtitl = get_indices_liquid_phase_origin(
            flashnr, pol_vals_dict[hydro_label], nsources_min=nsources_min)
    else:
        warn('Unknown filter type ' + filt_type)
        return

    flashnr_filt = flashnr[ind]
    time_data_filt = time_data[ind]
    time_in_flash_filt = time_in_flash[ind]
    lat_filt = lat[ind]
    lon_filt = lon[ind]
    alt_filt = alt[ind]
    dBm_filt = dBm[ind]
    pol_vals_dict_filt = deepcopy(pol_vals_dict)
    for key in pol_vals_dict.keys():
        pol_vals_dict_filt[key] = pol_vals_dict[key][ind]


#    # write the filtered data in a file
#    vals_list = []
#    for label in pol_vals_labels:
#        vals_list.append(pol_vals_dict_filt[label])
#
#    fname = basepath+basename'_'+data_ID+'.csv'
#    write_ts_lightning(
#        flashnr_filt, time_data_filt, time_in_flash_filt, lat_filt, lon_filt,
#        alt_filt, dBm_filt, vals_list, fname, pol_vals_labels)
#    print('written to '+fname)

# get flashes origin of filtered data
    flashnr_first, ind_first = np.unique(flashnr_filt, return_index=True)
    time_data_first = time_data_filt[ind_first]
    time_in_flash_first = time_in_flash_filt[ind_first]
    lat_first = lat_filt[ind_first]
    lon_first = lon_filt[ind_first]
    alt_first = alt_filt[ind_first]
    dBm_first = dBm_filt[ind_first]
    pol_vals_dict_first = deepcopy(pol_vals_dict_filt)
    for key in pol_vals_dict_filt.keys():
        pol_vals_dict_first[key] = pol_vals_dict_filt[key][ind_first]

    # get duration and area of flash
    duration_filt = np.ma.masked_all(flashnr_first.size)
    area_filt = np.ma.masked_all(flashnr_first.size)

    chy_filt, chx_filt, _ = wgs84_to_swissCH1903(lon_filt,
                                                 lat_filt,
                                                 alt_filt,
                                                 no_altitude_transform=True)

    for i, flash_ID in enumerate(flashnr_first):
        time_data_flash = time_data_filt[flashnr_filt == flash_ID]
        duration_filt[i] = (
            1e3 * (time_data_flash[-1] - time_data_flash[0]).total_seconds())

        chy_flash = chy_filt[flashnr_filt == flash_ID]
        chx_flash = chx_filt[flashnr_filt == flash_ID]

        points_flash = shapely.geometry.MultiPoint(
            list(zip(chy_flash, chx_flash)))
        area_filt[i] = points_flash.minimum_rotated_rectangle.area * 1e-6

    print('N flashes: ' + str(flashnr_first.size))
    print('N sources: ' + str(flashnr_filt.size))

    # Analyse the data

    # create histograms of hydrometeor proportions
    if 'propAG' in pol_vals_dict_filt:
        bins_centers = np.arange(0, 10, 1)
        bins_edges = np.arange(-0.5, 10.5, 1)

        # Create histogram of number of differnt hydrometeors types in each
        # radar range gate. All sources
        nhydros_hist = hist_nhydros_gate(pol_vals_dict_filt, percent_min=10.)

        fname = (basepath + data_ID +
                 '_allsources_ts_trajlightning_nhydro.png')
        plot_histogram2(bins_centers,
                        nhydros_hist, [fname],
                        labelx='Number of hydrometeors in radar range gate',
                        labely='occurrence',
                        titl='Trajectory Histogram All Sources' + subtitl)

        print("----- plot to '%s'" % fname)

        # store histogram
        fname = (basepath + data_ID +
                 '_allsources_ts_trajlightning_nhydro.csv')
        write_histogram(bins_edges, nhydros_hist, fname)
        print('Written ' + fname)

        # Create histogram of number of different hydrometeors types in each
        # radar range gate. First source
        nhydros_hist = hist_nhydros_gate(pol_vals_dict_first, percent_min=10.)

        fname = (basepath + data_ID +
                 '_firstsource_ts_trajlightning_nhydro.png')
        plot_histogram2(bins_centers,
                        nhydros_hist, [fname],
                        labelx='Number of hydrometeors in radar range gate',
                        labely='occurrence',
                        titl='Trajectory Histogram First Sources' + subtitl)

        print("----- plot to '%s'" % fname)

        # store histogram
        fname = (basepath + data_ID +
                 '_firstsource_ts_trajlightning_nhydro.csv')
        write_histogram(bins_edges, nhydros_hist, fname)
        print('Written ' + fname)

        return

        # Create histograms of dominant hydrometeors all sources
        hydro_hist2 = hist_dominant_hydrometeors(pol_vals_dict_filt,
                                                 percent_min=10.)

        fname_hist = basepath + data_ID + '_allsources_ts_trajlightning_hydro_dominant.png'
        fname_hist = _plot_time_range(bins_edges,
                                      bins_edges,
                                      hydro_hist2,
                                      None, [fname_hist],
                                      titl='Trajectory Histogram All Sources' +
                                      subtitl,
                                      xlabel='Dominant hydrometeor',
                                      ylabel='2nd most dominant hydrometeor',
                                      vmin=0,
                                      clabel='Occurrence',
                                      figsize=[10, 8],
                                      dpi=72)
        print('Plotted ' + ' '.join(fname_hist))

        # Create histogram of dominant hydrometeors first sources
        hydro_hist2 = hist_dominant_hydrometeors(pol_vals_dict_first,
                                                 percent_min=10.)

        fname_hist = basepath + data_ID + '_firstsource_ts_trajlightning_hydro_dominant.png'
        fname_hist = _plot_time_range(
            bins_edges,
            bins_edges,
            hydro_hist2,
            None, [fname_hist],
            titl='Trajectory Histogram First Sources' + subtitl,
            xlabel='Dominant hydrometeor',
            ylabel='2nd most dominant hydrometeor',
            vmin=0,
            clabel='Occurrence',
            figsize=[10, 8],
            dpi=72)
        print('Plotted ' + ' '.join(fname_hist))

        # create histogram of percentage of dominant hydrometeor all sources
        hydro_hist = hist_hydrometeor_mixtures(pol_vals_dict_filt)

        fname = (basepath + data_ID +
                 '_allsources_ts_trajlightning_hydro_prop.png')
        plot_histogram2(bins_centers,
                        hydro_hist, [fname],
                        labelx='radar echo classification (-)',
                        labely='percentage',
                        titl='Trajectory Histogram All Sources' + subtitl)

        print("----- plot to '%s'" % fname)

        # store histogram
        fname = (basepath + data_ID +
                 '_allsources_ts_trajlightning_hydro_prop.csv')
        write_histogram(bins_edges, hydro_hist, fname)
        print('Written ' + fname)

        # create histogram of percentage of dominant hydrometeor first sources
        hydro_hist = hist_hydrometeor_mixtures(pol_vals_dict_first)

        fname = (basepath + data_ID +
                 '_firstsource_ts_trajlightning_hydro_prop.png')
        plot_histogram2(bins_centers,
                        hydro_hist, [fname],
                        labelx='radar echo classification (-)',
                        labely='percentage',
                        titl='Trajectory Histogram First Sources' + subtitl)

        print("----- plot to '%s'" % fname)

        # store histogram
        fname = (basepath + data_ID +
                 '_firstsource_ts_trajlightning_hydro_prop.csv')
        write_histogram(bins_edges, hydro_hist, fname)
        print('Written ' + fname)

    for i, key in enumerate(pol_vals_labels):
        step = step_list[i]
        datatype = datatype_vec[i]

        field_name = get_fieldname_pyart(datatype)
        field_dict = get_metadata(field_name)

        labelx = get_colobar_label(field_dict, field_name)

        vals = pol_vals_dict_filt[key]
        bins, values = compute_histogram(vals, field_name, step=step)

        print(datatype + ' min: ' + str(vals.min()))
        print(datatype + ' max: ' + str(vals.max()))

        # Plot all sources histogram
        fname_first_source = (basepath + data_ID +
                              '_allsources_ts_trajlightning_' + datatype +
                              '.png')
        plot_histogram(bins,
                       values, [fname_first_source],
                       labelx=labelx,
                       titl='Trajectory Histogram All Sources' + subtitl)

        print("----- plot to '%s'" % fname_first_source)

        # store histogram
        fname_first_source = (basepath + data_ID +
                              '_allsources_ts_trajlightning_' + datatype +
                              '.csv')
        hist_values, _ = np.histogram(values, bins=bins)
        write_histogram(bins, hist_values, fname_first_source)
        print('Written ' + fname_first_source)

        # First sources
        vals = pol_vals_dict_first[key]
        bins, values = compute_histogram(vals, field_name, step=step)

        # Plot first source histogram
        fname_first_source = (basepath + data_ID +
                              '_firstsource_ts_trajlightning_' + datatype +
                              '.png')
        plot_histogram(bins,
                       values, [fname_first_source],
                       labelx=labelx,
                       titl='Trajectory Histogram First Source' + subtitl)

        print("----- plot to '%s'" % fname_first_source)

        # store histogram
        fname_first_source = (basepath + data_ID +
                              '_firstsource_ts_trajlightning_' + datatype +
                              '.csv')

        hist_values_first, _ = np.histogram(values, bins=bins)
        write_histogram(bins, hist_values_first, fname_first_source)
        print('Written ' + fname_first_source)

    # Get histograms of sources altitude and power

    # define histogram bin edges
    bin_edges_alt = np.arange(-50., 14150., 100.)
    bin_edges_dBm = np.arange(-17., 47., 1.)
    bin_edges_time = np.arange(0, 25, 1)
    bin_edges_area = np.arange(0., 2100., 100.)
    bin_edges_duration = np.arange(0., 1100., 100.)

    # Plot histogram of LMA flash area
    _, area_filt_values = compute_histogram(area_filt,
                                            None,
                                            bin_edges=bin_edges_area)
    fname_hist = basepath + data_ID + '_Santis_hist_area.png'
    fname_hist = plot_histogram(bin_edges_area,
                                area_filt_values, [fname_hist],
                                labelx='Area [km2]',
                                titl='Flash area' + subtitl)
    print('Plotted ' + ' '.join(fname_hist))

    fname_hist = basepath + data_ID + '_Santis_hist_area.csv'
    hist_area, _ = np.histogram(area_filt_values, bins=bin_edges_area)
    fname_hist = write_histogram(bin_edges_area, hist_area, fname_hist)
    print('Written ' + fname_hist)

    # Plot histogram of LMA flash duration
    _, duration_filt_values = compute_histogram(duration_filt,
                                                None,
                                                bin_edges=bin_edges_duration)
    fname_hist = basepath + data_ID + '_Santis_hist_duration.png'
    fname_hist = plot_histogram(bin_edges_duration,
                                duration_filt_values, [fname_hist],
                                labelx='Duration [ms]',
                                titl='Flash duration' + subtitl)
    print('Plotted ' + ' '.join(fname_hist))

    fname_hist = basepath + data_ID + '_Santis_hist_duration.csv'
    hist_duration, _ = np.histogram(duration_filt_values,
                                    bins=bin_edges_duration)
    fname_hist = write_histogram(bin_edges_duration, hist_duration, fname_hist)
    print('Written ' + fname_hist)

    # Plot histogram time of occurrence
    time_hour_first = occurrence_time(time_data_first)

    fname_hist = basepath + data_ID + '_Santis_hist_time.png'
    fname_hist = plot_histogram(bin_edges_time,
                                time_hour_first, [fname_hist],
                                labelx='Hour [UTC]',
                                titl='Flash occurrence time' + subtitl)
    print('Plotted ' + ' '.join(fname_hist))

    fname_hist = basepath + data_ID + '_Santis_hist_time.csv'
    hist_time, _ = np.histogram(time_hour_first, bins=bin_edges_time)
    fname_hist = write_histogram(bin_edges_time, hist_time, fname_hist)
    print('Written ' + fname_hist)

    # Plot histogram altitude all sources
    _, alt_filt_values = compute_histogram(alt_filt,
                                           None,
                                           bin_edges=bin_edges_alt)
    fname_hist = basepath + data_ID + '_Santis_hist_alt.png'
    fname_hist = plot_histogram(bin_edges_alt,
                                alt_filt_values, [fname_hist],
                                labelx='Altitude [m MSL]',
                                titl='Flash sources altitude' + subtitl)
    print('Plotted ' + ' '.join(fname_hist))

    fname_hist = basepath + data_ID + '_Santis_hist_alt.csv'
    hist_alt, _ = np.histogram(alt_filt_values, bins=bin_edges_alt)
    fname_hist = write_histogram(bin_edges_alt, hist_alt, fname_hist)
    print('Written ' + fname_hist)

    # Plot histogram altitude first sources
    _, alt_first_values = compute_histogram(alt_first,
                                            None,
                                            bin_edges=bin_edges_alt)
    fname_hist = basepath + data_ID + '_Santis_hist_alt_first_source.png'
    fname_hist = plot_histogram(bin_edges_alt,
                                alt_first_values, [fname_hist],
                                labelx='Altitude [m MSL]',
                                titl='Flash first source altitude' + subtitl)
    print('Plotted ' + ' '.join(fname_hist))

    fname_hist = basepath + data_ID + '_Santis_hist_alt_first_source.csv'
    hist_alt_fist, _ = np.histogram(alt_first_values, bins=bin_edges_alt)
    fname_hist = write_histogram(bin_edges_alt, hist_alt_fist, fname_hist)
    print('Written ' + fname_hist)

    # Plot histogram power all sources
    _, dBm_filt_values = compute_histogram(dBm_filt,
                                           None,
                                           bin_edges=bin_edges_dBm)
    fname_hist = basepath + data_ID + '_Santis_hist_dBm.png'
    fname_hist = plot_histogram(bin_edges_dBm,
                                dBm_filt_values, [fname_hist],
                                labelx='Power [dBm]',
                                titl='Flash sources power' + subtitl)
    print('Plotted ' + ' '.join(fname_hist))

    fname_hist = basepath + data_ID + '_Santis_hist_dBm.csv'
    hist_dBm, _ = np.histogram(dBm_filt_values, bins=bin_edges_dBm)
    fname_hist = write_histogram(bin_edges_dBm, hist_dBm, fname_hist)
    print('Written ' + fname_hist)

    # Plot histogram power first sources
    _, dBm_first_values = compute_histogram(dBm_first,
                                            None,
                                            bin_edges=bin_edges_dBm)
    fname_hist = basepath + data_ID + '_Santis_hist_dBm_first_source.png'
    fname_hist = plot_histogram(bin_edges_dBm,
                                dBm_first_values, [fname_hist],
                                labelx='Power [dBm]',
                                titl='Flash first source power' + subtitl)
    print('Plotted ' + ' '.join(fname_hist))

    fname_hist = basepath + data_ID + '_Santis_hist_dBm_first_source.csv'
    hist_dBm_first, _ = np.histogram(dBm_first_values, bins=bin_edges_dBm)
    fname_hist = write_histogram(bin_edges_dBm, hist_dBm_first, fname_hist)
    print('Written ' + fname_hist)

    # Plot 2D histogram all sources
    H, _, _ = np.histogram2d(alt_filt_values,
                             dBm_filt_values,
                             bins=[bin_edges_alt, bin_edges_dBm])

    # set 0 values to blank
    H = np.ma.asarray(H)
    H[H == 0] = np.ma.masked

    fname_hist = basepath + data_ID + '_Santis_2Dhist_alt_dBm.png'
    fname_hist = _plot_time_range(bin_edges_alt,
                                  bin_edges_dBm,
                                  H,
                                  None, [fname_hist],
                                  titl='LMA sources Altitude-Power histogram' +
                                  subtitl,
                                  xlabel='Altitude [m MSL]',
                                  ylabel='Power [dBm]',
                                  clabel='Occurrence',
                                  vmin=0,
                                  vmax=None,
                                  figsize=[10, 8],
                                  dpi=72)
    print('Plotted ' + ' '.join(fname_hist))

    # Plot 2D histogram first sources
    H, _, _ = np.histogram2d(alt_first_values,
                             dBm_first_values,
                             bins=[bin_edges_alt, bin_edges_dBm])

    # set 0 values to blank
    H = np.ma.asarray(H)
    H[H == 0] = np.ma.masked

    fname_hist = basepath + data_ID + '_Santis_2Dhist_alt_dBm_first_source.png'
    fname_hist = _plot_time_range(
        bin_edges_alt,
        bin_edges_dBm,
        H,
        None, [fname_hist],
        titl='LMA first sources Altitude-Power histogram' + subtitl,
        xlabel='Altitude [m MSL]',
        ylabel='Power [dBm]',
        clabel='Occurrence',
        vmin=0,
        vmax=None,
        figsize=[10, 8],
        dpi=72)
    print('Plotted ' + ' '.join(fname_hist))

    # plot position all sources
    figfname = basepath + data_ID + '_Santis_LMA_sources_pos_max_height_on_top.png'
    figfname = plot_pos(lat_filt,
                        lon_filt,
                        alt_filt, [figfname],
                        sort_altitude='Highest_on_top',
                        cb_label='Source height [m MSL]',
                        titl='Flash sources position. Highest on top' +
                        subtitl)
    print('Plotted ' + ' '.join(figfname))

    figfname = basepath + data_ID + '_Santis_LMA_sources_pos_min_height_on_top.png'
    figfname = plot_pos(lat_filt,
                        lon_filt,
                        alt_filt, [figfname],
                        sort_altitude='Lowest_on_top',
                        cb_label='Source height [m MSL]',
                        titl='Flash sources position. Lowest on top' + subtitl)
    print('Plotted ' + ' '.join(figfname))

    # plot position first source
    figfname = (basepath + data_ID +
                '_Santis_LMA_first_source_pos_max_height_on_top.png')
    figfname = plot_pos(lat_first,
                        lon_first,
                        alt_first, [figfname],
                        sort_altitude='Highest_on_top',
                        cb_label='Source height [m MSL]',
                        titl='First flash source position. Highest on top' +
                        subtitl)
    print('Plotted ' + ' '.join(figfname))

    figfname = (basepath + data_ID +
                '_Santis_LMA_first_source_pos_min_height_on_top.png')
    figfname = plot_pos(lat_first,
                        lon_first,
                        alt_first, [figfname],
                        sort_altitude='Lowest_on_top',
                        cb_label='Source height [m MSL]',
                        titl='First flash source position. Lowest on top' +
                        subtitl)
    print('Plotted ' + ' '.join(figfname))
Example #6
0
def main():
    """
    """
    # parse the arguments
    parser = argparse.ArgumentParser(
        description='Entry to Pyrad processing framework')

    # positional arguments
    parser.add_argument(
        'days', nargs='+', type=str,
        help='Dates to process. Format YYYY-MM-DD')

    # keyword arguments
    parser.add_argument(
        '--nsources_min', type=int, default=10,
        help='Minimum number of sources to consider the LMA flash valid')

    parser.add_argument(
        '--trtbase', type=str,
        default='/store/msrad/radar/trt/',
        help='name of folder containing the TRT cell data')

    parser.add_argument(
        '--flashpath', type=str,
        default='/store/msrad/lightning/LMA/Santis/',
        help='name of the folder containing the Santis LMA data')

    args = parser.parse_args()

    print("====== LMA data TRT processing started: %s" %
          datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"))
    atexit.register(_print_end_msg,
                    "====== LMA data TRT processing finished: ")

    print('trt path: '+args.trtbase)

    time_dir_list = args.days

    # Get bins altitude
    alt_min = 0.
    alt_max = 14000.
    step = 100.
    bin_edges = np.linspace(
        alt_min-step/2., alt_max+step/2, num=int((alt_max-alt_min)/step)+2)

    trt_list = []
    for time_dir in time_dir_list:
        trt_list.extend(glob.glob(
            args.trtbase+time_dir+'/TRTC_cell_plots/All/*.trt'))
        trt_list.extend(glob.glob(
            args.trtbase+time_dir+'/TRTC_cell_plots/Some/*.trt'))

    cell_ID_list = np.ma.asarray([], dtype=int)
    time_list = np.ma.asarray([], dtype=datetime.datetime)
    lon_list = np.ma.asarray([], dtype=float)
    lat_list = np.ma.asarray([], dtype=float)
    flash_density_list = np.ma.asarray([], dtype=float)
    sources_density_list = np.ma.asarray([], dtype=float)
    rank_flash_density_list = np.ma.asarray([], dtype=float)
    area_list = np.ma.asarray([], dtype=float)
    nflash_list = np.ma.asarray([], dtype=int)
    nsources_list = np.ma.asarray([], dtype=int)
    for trt_fname in trt_list:
        print('processing TRT cell file '+trt_fname)
        trtpath = os.path.dirname(trt_fname)+'/'

        # reading TRT cell file
        (traj_ID, yyyymmddHHMM, lon_trt, lat_trt, _, _, _, area, _, _, _,
         RANKr, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, cell_contour) = (
             read_trt_traj_data(trt_fname))

        # reading lightning file
        infostr = os.path.basename(trt_fname).split('.')[0]
        dt_str = infostr[0:12]
        dt = datetime.datetime.strptime(dt_str, '%Y%m%d%H%M')

        flashnr, time_data, _, lat, lon, alt, _ = read_lightning(
            args.flashpath+dt.strftime("%y%m%d")+'.txt')

        if flashnr is None:
            continue

        # Filter data with less than nsources_min sources
        unique_flashnr = np.unique(flashnr, return_index=False)

        ind = []
        for flash in unique_flashnr:
            ind_flash = np.where(flashnr == flash)[0]
            if ind_flash.size < args.nsources_min:
                continue
            ind.extend(ind_flash)

        if np.size(ind) == 0:
            continue

        flashnr = flashnr[ind]
        time_data = time_data[ind]
        lat = lat[ind]
        lon = lon[ind]
        alt = alt[ind]

        # Get first sources data
        _, unique_ind = np.unique(flashnr, return_index=True)
        lat_first = lat[unique_ind]
        lon_first = lon[unique_ind]
        alt_first = alt[unique_ind]
        # dBm_first = dBm[unique_ind]
        time_first = time_data[unique_ind]

        # Find cell period
        cell_dt_s = np.empty(yyyymmddHHMM.size, dtype=float)
        for i, cell_time in enumerate(yyyymmddHHMM):
            cell_dt_s[i] = (cell_time-yyyymmddHHMM[0]).total_seconds()
        t_res = np.mean(cell_dt_s[1:]-cell_dt_s[:-1])

        # analyze first sources
        print('\n\n--- Processing first sources ----')
        nflashes = np.zeros(yyyymmddHHMM.size, dtype=int)
        for i, cell_time in enumerate(yyyymmddHHMM):
            cell_time_str = cell_time.strftime("%Y%m%d%H%M%S")

            # Find flashes within time step of cell
            tstart_cell_step = cell_time-datetime.timedelta(seconds=t_res)
            inds = np.where(np.logical_and(
                time_first > tstart_cell_step, time_first <= cell_time))[0]
            if inds.size == 0:
                warn('No flashes within time step')
                fname_hist = (
                    trtpath+cell_time_str+'_'+infostr +
                    '_hist_alt_first_source.csv')
                fname_hist = write_histogram(
                    bin_edges, np.zeros(bin_edges.size-1, dtype=int),
                    fname_hist, step=step)
                print('----- save to '+fname_hist)
                continue

            lat_cell = lat_first[inds]
            lon_cell = lon_first[inds]
            alt_cell = alt_first[inds]

            # Find flashes within cell contour
            inds, is_roi = belongs_roi_indices(
                lat_cell, lon_cell, cell_contour[i])
            if is_roi == 'None':
                warn('No flashes within cell contour')
                fname_hist = (
                    trtpath+cell_time_str+'_'+infostr +
                    '_hist_alt_first_source.csv')
                fname_hist = write_histogram(
                    bin_edges, np.zeros(bin_edges.size-1, dtype=int),
                    fname_hist, step=step)
                print('----- save to '+fname_hist)
                continue
            elif is_roi == 'All':
                inds = inds[0]

            lat_cell = lat_cell[inds]
            lon_cell = lon_cell[inds]
            alt_cell = alt_cell[inds]

            # compute number of flashes
            nflashes[i] = lat_cell.size

            # Plot altitude histogram
            fname_hist = (
                trtpath+cell_time_str+'_'+infostr +
                '_hist_alt_first_source.png')
            titl = (
                cell_time_str+' '+infostr +
                ' TRT cell. Flash first source altitude')
            fname_hist = plot_histogram(
                bin_edges, alt_cell, [fname_hist], labelx='Altitude [m MSL]',
                titl=titl)
            print('Plotted '+' '.join(fname_hist))

            fname_hist = (
                trtpath+cell_time_str+'_'+infostr +
                '_hist_alt_first_source.csv')
            hist, bin_edges = np.histogram(alt_cell, bins=bin_edges)
            fname_hist = write_histogram(
                bin_edges, hist, fname_hist, step=step)
            print('----- save to '+fname_hist)

            # plot position first source
            figfname = (
                trtpath+cell_time_str+'_'+infostr +
                '_first_source_pos_max_height_on_top.png')
            titl = (
                cell_time_str+' '+infostr +
                ' first flash source position. Highest on top')
            figfname = plot_pos(
                lat_cell, lon_cell, alt_cell, [figfname],
                sort_altitude='Highest_on_top',
                cb_label='Source height [m MSL]', titl=titl)
            print('Plotted '+' '.join(figfname))

        # plot time series lightning density
        figfname = (
            trtpath+cell_time_str+'_'+infostr+'_dens_first_sources.png')
        titl = infostr+' First sources density'
        figfname = plot_timeseries(
            yyyymmddHHMM, [nflashes/area], [figfname], labelx='Time [UTC]',
            labely='Flash dens [Flashes/Km2]', labels=['Flash density'],
            title=titl, period=0, timeformat=None, colors=None,
            linestyles=None, markers=None, ymin=None, ymax=None, dpi=72)
        print('Plotted '+' '.join(figfname))

        # plot time series lightning
        figfname = (
            trtpath+cell_time_str+'_'+infostr+'_N_first_sources.png')
        titl = infostr+' Number of first sources in cell'
        figfname = plot_timeseries(
            yyyymmddHHMM, [nflashes], [figfname], labelx='Time [UTC]',
            labely='N flashes', labels=['N flashes'], title=titl,
            period=0, timeformat=None, colors=None, linestyles=None,
            markers=None, ymin=None, ymax=None, dpi=72)
        print('Plotted '+' '.join(figfname))

        # plot time-hist_height
        flist = glob.glob(
            trtpath+'*_'+infostr+'_hist_alt_first_source.csv')

        if not flist:
            warn('No histogram files found in '+trtpath +
                 ' for TRT cell '+infostr)
        else:
            tbin_edges, bin_edges, data_ma, _ = read_histogram_ts(
                flist, 'flash_altitude')

            vmax = np.max(data_ma)
            if vmax == 0.:
                warn('Unable to plot histogram. No valid data')
            else:
                data_ma[data_ma == 0.] = np.ma.masked
                fname_hist = (
                    trtpath+infostr+'_trt_HISTOGRAM_alt_first_source.png')
                titl = ('TRT cell '+infostr+'\n' +
                        'Altitude of first flash source')
                _plot_time_range(
                    tbin_edges, bin_edges, data_ma, 'frequency_of_occurrence',
                    [fname_hist], titl=titl,
                    ylabel='Altitude [m MSL]',
                    vmin=0., vmax=vmax, figsize=[10, 8], dpi=72)

                print("----- plot to '%s'" % fname_hist)

        # Append flash data
        cell_ID_list = np.ma.append(cell_ID_list, traj_ID)
        time_list = np.ma.append(time_list, yyyymmddHHMM)
        lon_list = np.ma.append(lon_list, lon_trt)
        lat_list = np.ma.append(lat_list, lat_trt)
        flash_density_list = np.ma.append(flash_density_list, nflashes/area)
        rank_flash_density_list = np.ma.append(
            rank_flash_density_list, RANKr)
        area_list = np.ma.append(area_list, area)
        nflash_list = np.ma.append(nflash_list, nflashes)

        # analyze all flashes
        print('\n\n--- Processing all sources ----')
        nflashes = np.zeros(yyyymmddHHMM.size, dtype=int)
        for i, cell_time in enumerate(yyyymmddHHMM):
            cell_time_str = cell_time.strftime("%Y%m%d%H%M%S")

            # Find flashes within time step of cell
            tstart_cell_step = cell_time-datetime.timedelta(seconds=t_res)
            inds = np.where(np.logical_and(
                time_data > tstart_cell_step, time_data <= cell_time))[0]
            if inds.size == 0:
                warn('No flashes within time step')
                fname_hist = (
                    trtpath+cell_time_str+'_'+infostr +
                    '_hist_alt_all_sources.csv')
                fname_hist = write_histogram(
                    bin_edges, np.zeros(bin_edges.size-1, dtype=int),
                    fname_hist, step=step)
                print('----- save to '+fname_hist)
                continue

            lat_cell = lat[inds]
            lon_cell = lon[inds]
            alt_cell = alt[inds]

            # Find flashes within cell contour
            inds, is_roi = belongs_roi_indices(
                lat_cell, lon_cell, cell_contour[i])
            if is_roi == 'None':
                warn('No flashes within cell contour')
                fname_hist = (
                    trtpath+cell_time_str+'_'+infostr +
                    '_hist_alt_all_sources.csv')
                fname_hist = write_histogram(
                    bin_edges, np.zeros(bin_edges.size-1, dtype=int),
                    fname_hist, step=step)
                print('----- save to '+fname_hist)
                continue
            elif is_roi == 'All':
                inds = inds[0]

            lat_cell = lat_cell[inds]
            lon_cell = lon_cell[inds]
            alt_cell = alt_cell[inds]

            # compute number of flashes
            nflashes[i] = lat_cell.size

            # Plot altitude histogram
            fname_hist = (
                trtpath+cell_time_str+'_'+infostr +
                '_hist_alt_all_sources.png')
            titl = (
                cell_time_str+' '+infostr +
                ' TRT cell. Flash all sources altitude')
            fname_hist = plot_histogram(
                bin_edges, alt_cell, [fname_hist], labelx='Altitude [m MSL]',
                titl=titl)
            print('Plotted '+' '.join(fname_hist))

            fname_hist = (
                trtpath+cell_time_str+'_'+infostr +
                '_hist_alt_all_sources.csv')
            hist, bin_edges = np.histogram(alt_cell, bins=bin_edges)
            fname_hist = write_histogram(
                bin_edges, hist, fname_hist, step=step)
            print('----- save to '+fname_hist)

            # plot position first source
            figfname = (
                trtpath+cell_time_str+'_'+infostr +
                '_all_sources_pos_max_height_on_top.png')
            titl = (
                cell_time_str+' '+infostr +
                ' all flash sources position. Highest on top')
            figfname = plot_pos(
                lat_cell, lon_cell, alt_cell, [figfname],
                sort_altitude='Highest_on_top',
                cb_label='Source height [m MSL]', titl=titl)
            print('Plotted '+' '.join(figfname))

        # plot time series lightning
        figfname = (
            trtpath+cell_time_str+'_'+infostr+'_N_all_sources.png')
        titl = infostr+' Number of sources in cell'
        figfname = plot_timeseries(
            yyyymmddHHMM, [nflashes], [figfname], labelx='Time [UTC]',
            labely='N flashes', labels=['N flashes'], title=titl,
            period=0, timeformat=None, colors=None, linestyles=None,
            markers=None, ymin=None, ymax=None, dpi=72)
        print('Plotted '+' '.join(figfname))

        # plot time series lightning density
        figfname = (
            trtpath+cell_time_str+'_'+infostr+'_dens_all_sources.png')
        titl = infostr+' Sources density'
        figfname = plot_timeseries(
            yyyymmddHHMM, [nflashes/area], [figfname], labelx='Time [UTC]',
            labely='Source dens [Sources/Km2]', labels=['Source density'],
            title=titl, period=0, timeformat=None, colors=None,
            linestyles=None, markers=None, ymin=None, ymax=None, dpi=72)
        print('Plotted '+' '.join(figfname))

        # plot time-hist_height
        flist = glob.glob(
            trtpath+'*_'+infostr+'_hist_alt_all_sources.csv')

        if not flist:
            warn('No histogram files found in '+trtpath +
                 ' for TRT cell '+infostr)
        else:
            tbin_edges, bin_edges, data_ma, _ = read_histogram_ts(
                flist, 'flash_altitude')

            vmax = np.max(data_ma)
            if vmax == 0.:
                warn('Unable to plot histogram. No valid data')
            else:
                data_ma[data_ma == 0.] = np.ma.masked
                fname_hist = (
                    trtpath+'/'+infostr+'_trt_HISTOGRAM_alt_all_source.png')
                titl = (
                    'TRT cell '+infostr+'\n'+'Altitude of all flash sources')
                _plot_time_range(
                    tbin_edges, bin_edges, data_ma, 'frequency_of_occurrence',
                    [fname_hist], titl=titl,
                    ylabel='Altitude [m MSL]',
                    vmin=0., vmax=np.max(data_ma), figsize=[10, 8], dpi=72)

                print("----- plot to '%s'" % fname_hist)

        # Append sources data
        sources_density_list = np.ma.append(sources_density_list, nflashes/area)
        nsources_list = np.ma.append(nsources_list, nflashes)

    fname = args.trtbase+'cell_LMA_flashes.csv'
    write_trt_cell_lightning(
        cell_ID_list, time_list, lon_list, lat_list, area_list,
        rank_flash_density_list, nflash_list, flash_density_list, fname)

    fname = args.trtbase+'cell_LMA_sources.csv'
    write_trt_cell_lightning(
        cell_ID_list, time_list, lon_list, lat_list, area_list,
        rank_flash_density_list, nsources_list, sources_density_list, fname)