Beispiel #1
0
def test_pipeline_h5_dat_separated():
    print('\n===== test_pipeline_h5_dat_separated: BEGIN =====')

    # If there is an old ALT_DAT_DIR, recreate it empty.
    rmtree(ALT_DAT_DIR, ignore_errors=True)
    os.mkdir(ALT_DAT_DIR)

    # Copy the .dat files to ALT_DAT_DIR.
    for path_dat in DAT_LIST_ONS:
        cmd = 'cp ' + path_dat + " " + ALT_DAT_DIR
        os.system(cmd)

    # Make the dat list relative to ALT_DAT_DIR.
    with open(PATH_DAT_LIST_FILE, 'w') as fh:
        for path_dat in ALT_DAT_LIST_ONS:
            fh.write('{}\n'.format(path_dat))

    # Make the h5 list.
    with open(PATH_H5_LIST_FILE, 'w') as fh:
        for path_dat in H5_LIST_ONS:
            fh.write('{}\n'.format(path_dat))

    # With the list of separated .dat and .h5 files, do find_event_pipeline()
    df_event = find_event_pipeline(PATH_DAT_LIST_FILE,
                                   PATH_H5_LIST_FILE,
                                   filter_threshold=3,
                                   number_in_cadence=3,
                                   user_validation=False,
                                   saving=True,
                                   on_source_complex_cadence='VOYAGER-1',
                                   csv_name=PATH_CSVF)

    # df_event should not be nil.
    if df_event is None:
        raise ValueError(
            'test_pipeline_same_source: returned pandas df is None!')

    # CSV file created?
    if not Path(PATH_CSVF).exists():
        raise ValueError('test_pipeline_same_source: No CSV of events created')

    # An event CSV was created.
    # Validate the hit table file.
    utl.validate_hittbl(df_event, PATH_CSVF, 'test_pipeline_same_source',
                        N_EVENTS)

    # Plots!
    plot_event_pipeline(PATH_CSVF,
                        PATH_H5_LIST_FILE,
                        filter_spec=3,
                        user_validation=False)

    print('\n===== test_pipeline_h5_dat_separated: END =====')
Beispiel #2
0
def main():
    print("\nExecuting program...")
    start = time.time()
    # parse any command line arguments
    cmd_args = parse_args()
    fildir = cmd_args["fildir"]
    csvf = cmd_args["csvf"]
    csvpath = fildir + csvf
    os.chdir(csvpath)
    lst_fil = csvpath + "fil_files.lst"
    csv_list = sorted(glob.glob('*.csv'))
    # move the h5 files to the dat directory
    h5_indir = find_input_data(fildir, '.h5')
    h5_datdir = find_input_data(csvpath, '.h5')
    if not h5_datdir:
        if h5_indir:
            for file in h5_indir:
                h5_file_path = fildir + os.path.basename(file)
                mvh5_in = 'mv ' + h5_file_path + " " + csvpath
                call([mvh5_in], shell=True)
        else:
            fil_list = find_input_data(fildir, '.fil')
            for fil in fil_list:
                bp.fil2h5.make_h5_file(fil, out_dir=csvpath)

    if csv_list:
        for item in csv_list:
            csv_file = item  # csv string in plot_event_pipeline has to be filename only, not entire filepath.
            print("\nRunning plot_event_pipeline...")
            plot_event_pipeline(csv_file, lst_fil)
            plot_dir = csvpath + os.path.splitext(item)[0].split(
                '.')[0] + '_plots/'
            if not os.path.isdir(plot_dir):
                os.mkdir(plot_dir)
            mv_plots = 'mv ' + fildir + os.path.splitext(item)[0].split(
                '.')[0].split('_')[2] + '*.p?? ' + plot_dir
            call([mv_plots], shell=True)
    else:
        print(f'\nThere is no csv file file in {csvpath}')

    # move the h5 files back to the fil directory to save space
    h5_done = find_input_data(csvpath, '.h5')
    for h5 in h5_done:
        mvh5_out = 'mv ' + csvpath + os.path.basename(h5) + " " + fildir
        call([mvh5_out], shell=True)

    end, time_label = get_elapsed_time(start)
    print(f"\nTotal time to execute this program: %.2f {time_label}.\n" % end)
    return None
Beispiel #3
0
def find_plot_pipelines(need_init=True, filter_threshold=FILTER_THRESHOLD):
    r'''
    Exercise find_event_pipeline() and plot_event_pipeline()
    '''

    main_time_start = time()

    # If configured to do so, initialize temp directory
    # and fetch all of the HDF5 files from the Internet.
    if need_init:
        initialize()
        for filename_h5 in H5_FILE_LIST:
            wgetter(filename_h5)
        # Make all of the DAT files.
        make_all_dat_files()

    print('find_plot_pipelines: Filter threshold = ', filter_threshold)
    number_in_cadence = len(H5_FILE_LIST)
    print('find_plot_pipelines: Cadence length = ', number_in_cadence)
    print('find_plot_pipelines: find_event_pipeline({}) ...'.format(
        PATH_DAT_LIST_FILE))

    # With the list of DAT files, do find_event_pipeline()
    df_event = find_event_pipeline(PATH_DAT_LIST_FILE,
                                   filter_threshold=filter_threshold,
                                   number_in_cadence=number_in_cadence,
                                   user_validation=False,
                                   saving=True,
                                   csv_name=PATH_CSVF)

    # CSV file created?
    if not Path(PATH_CSVF).exists():
        raise ValueError('find_plot_pipelines: No CSV of events created')

    # An event CSV was created.
    # Validate the hit table file.
    utl.validate_hittbl(df_event, PATH_CSVF, 'test_pipe_lines', N_EVENTS)

    # Make a list of the HDF5 files.
    print(
        'find_plot_pipelines: making a list of HDF5 files in ({}) ...'.format(
            PATH_DAT_LIST_FILE))
    with open(PATH_H5_LIST_FILE, 'w') as file_handle:
        for filename_h5 in H5_FILE_LIST:
            file_handle.write('{}\n'.format(TESTDIR + filename_h5))

    # Do the plots for all of the HDF5/DAT file pairs.
    print('find_plot_pipelines: plot_event_pipeline({}, {}) ...'.format(
        PATH_CSVF, PATH_H5_LIST_FILE))
    plot_event_pipeline(PATH_CSVF,
                        PATH_H5_LIST_FILE,
                        filter_spec='f{}'.format(filter_threshold),
                        user_validation=False)

    # Check that the right number of PNG files were created.
    outdir_list = listdir(TESTDIR)
    npngs = 0
    for cur_file in outdir_list:
        if cur_file.split('.')[-1] == 'png':
            if imghdr.what(TESTDIR + cur_file) != 'png':
                raise ValueError(
                    'find_plot_pipelines: File {} is not a PNG file'.format(
                        cur_file))
            npngs += 1
    if npngs != N_EVENTS:
        raise ValueError(
            'find_plot_pipelines: Expected to find {} PNG files but observed {}'
            .format(N_EVENTS, npngs))

    # Stop the clock - we're done.
    main_time_stop = time()

    print('find_plot_pipelines: End, et = {:.1f} seconds'.format(
        main_time_stop - main_time_start))
def find_plot_pipelines(filter_threshold=3,
                        on_off_first='ON',
                        plot_dir=None,
                        on_source_complex_cadence=False):
    r'''
    Exercise find_event_pipeline() and plot_event_pipeline()
    '''

    main_time_start = time()

    print('find_plot_pipelines_2: Filter threshold = ', filter_threshold)
    h5_file_list = sorted(glob.glob(TESTDIR + 'single*.h5'))
    dat_file_list = sorted(glob.glob(TESTDIR + 'single*.dat'))
    number_in_cadence = len(h5_file_list)
    if number_in_cadence != 6:
        raise ValueError(
            'find_plot_pipelines_2: Expected to find 6 h5 files but observed {}'
            .format(number_in_cadence))
    number_in_cadence = len(dat_file_list)
    if number_in_cadence != 6:
        raise ValueError(
            'find_plot_pipelines_2: Expected to find 6 dat files but observed {}'
            .format(number_in_cadence))

    # Re-order the H5 and DAT files into OFF-ON-...
    # In the 2 lists, switch 1 and 2, 3 and 4, 5 and 6
    for ix in [0, 2, 4]:
        temp = h5_file_list[ix]
        h5_file_list[ix] = h5_file_list[ix + 1]
        h5_file_list[ix + 1] = temp
        temp = dat_file_list[ix]
        dat_file_list[ix] = dat_file_list[ix + 1]
        dat_file_list[ix + 1] = temp
    fh_h5 = open(PATH_H5_LIST_FILE, 'w')
    fh_dat = open(PATH_DAT_LIST_FILE, 'w')
    for ix in range(6):
        fh_h5.write(h5_file_list[ix] + '\n')
        fh_dat.write(dat_file_list[ix] + '\n')
    fh_h5.close()
    fh_dat.close()
    print('find_plot_pipelines_2: H5/dat cadence length = ', number_in_cadence)

    # If CSV exists from a previous execution, remove it.
    try:
        remove(PATH_CSVF)
    except:
        pass

    # With the list of DAT files, do find_event_pipeline()
    print('===== find_event_pipeline BEGIN =====')
    df_event = find_event_pipeline(
        PATH_DAT_LIST_FILE,
        sortby_tstart=False,
        filter_threshold=filter_threshold,
        number_in_cadence=number_in_cadence,
        user_validation=False,
        saving=True,
        on_off_first=on_off_first,
        on_source_complex_cadence=on_source_complex_cadence,
        csv_name=PATH_CSVF)
    print('===== find_event_pipeline END =====')

    # CSV file created?
    if not Path(PATH_CSVF).exists():
        raise ValueError('find_plot_pipelines_2: No CSV of events created')

    # An event CSV was created.
    # Validate the hit table file.
    utl.validate_hittbl(df_event, PATH_CSVF, 'test_pipe_lines_2')

    # Do the plots for all of the HDF5/DAT file pairs.
    print(
        '===== plot_event_pipeline #1 (plot_dir does not yet exist) BEGIN ====='
    )
    rmtree(plot_dir, ignore_errors=True)
    plot_event_pipeline(PATH_CSVF,
                        PATH_H5_LIST_FILE,
                        filter_spec='f{}'.format(filter_threshold),
                        plot_dir=plot_dir,
                        user_validation=False)
    print('===== plot_event_pipeline #2 (plot_dir already exists) BEGIN =====')
    plot_event_pipeline(PATH_CSVF,
                        PATH_H5_LIST_FILE,
                        filter_spec='f{}'.format(filter_threshold),
                        plot_dir=plot_dir,
                        user_validation=False)

    # Check that the right number of PNG files were created.
    print('===== plot_event_pipeline END =====')
    outdir_list = listdir(plot_dir)
    npngs = 0
    for cur_file in outdir_list:
        if cur_file.split('.')[-1] == 'png':
            if imghdr.what(plot_dir + cur_file) != 'png':
                raise ValueError(
                    'find_plot_pipelines_2: File {} is not a PNG file'.format(
                        cur_file))
            npngs += 1
    if npngs != 6:
        raise ValueError(
            'find_plot_pipelines_2: Expected to find 6 PNG files but observed {}'
            .format(npngs))

    # Stop the clock - we're done.
    main_time_stop = time()

    print('find_plot_pipelines_2: End, et = {:.1f} seconds'.format(
        main_time_stop - main_time_start))
Beispiel #5
0
def execute_pipelines(args):
    r"""
    Interface to the pipeline functions, called by main().

    Parameters
    ----------
    args : dict
    """

    # Setup some parameter values for find_event_pipeline().
    if args.cadence == "complex":
        complex_cadence = True
        if len(args.source_name) < 1:
            print(
                "\n*** plotSETI: Complex cadence requires a source_name.  Bye-bye."
            )
            sys.exit(86)

    else:
        complex_cadence = False
        if args.cadence == "on":
            first_file = "ON"
        else:
            first_file = "OFF"
    from_dir = os.path.abspath(args.input_dir_path) + "/"
    dest_dir = os.path.abspath(args.out_dir) + "/"
    if not os.path.exists(dest_dir):
        os.mkdir(dest_dir)

    # Establish output pathnames,
    path_h5_list = dest_dir + NAME_H5_LIST
    path_dat_list = dest_dir + NAME_DAT_LIST
    path_csvf = dest_dir + NAME_CSVF
    clean_event_stuff(dest_dir)
    make_lists(from_dir, path_h5_list, path_dat_list)

    # Run find_event_pipeline()
    number_in_cadence = len(open(path_h5_list).readlines())
    if complex_cadence:
        df_check = find_event_pipeline(
            path_dat_list,
            filter_threshold=args.filt_thresh,
            number_in_cadence=number_in_cadence,
            on_source_complex_cadence=args.source_name,
            sortby_tstart=True,
            check_zero_drift=False,
            SNR_cut=args.snr_threshold,
            user_validation=False,
            csv_name=path_csvf,
            saving=True)
    else:  # not a complex cadence
        df_check = find_event_pipeline(path_dat_list,
                                       filter_threshold=args.filt_thresh,
                                       number_in_cadence=number_in_cadence,
                                       on_source_complex_cadence=False,
                                       on_off_first=first_file,
                                       sortby_tstart=True,
                                       check_zero_drift=False,
                                       SNR_cut=args.snr_threshold,
                                       user_validation=False,
                                       csv_name=path_csvf,
                                       saving=True)

    if df_check is None:
        print(
            "\n*** plotSETI: No events produced in find_event_pipeline()!  Bye-bye."
        )
        return 86

    # Make the plots for all of the HDF5/DAT file pairs in batch mode.
    matplotlib.use("agg", force=True)
    plot_event_pipeline(path_csvf,
                        path_h5_list,
                        plot_dir=dest_dir,
                        filter_spec=args.filt_thresh,
                        offset=0,
                        user_validation=False)

    print("\nplotSETI: Plots are stored in directory {}.  Bye-bye.".format(
        dest_dir))

    return 0
Beispiel #6
0
def find_plot_pipelines(need_init=True, filter_threshold=2):
    '''
    Exercise find_event_pipeline() and plot_event_pipeline()
    '''

    main_time_start = time()

    # If configured to do so, initialize temp directory.
    if need_init:
        initialize()

    # If configured to do so, fetch all of the HDF5 files from the Internet.
    if need_init:
        for filename_h5 in H5_FILE_LIST:
            wgetter(filename_h5)

    # Make all of the DAT files.
    make_all_dat_files()

    # Create the CSV file used by plot_event_pipeline.
    print('find_plot_pipelines: Filter threshold = ', filter_threshold)
    number_in_cadence = len(H5_FILE_LIST)
    print('find_plot_pipelines: Cadence length = ', number_in_cadence)
    print('find_plot_pipelines: find_event_pipeline({}) ...'.format(
        PATH_DAT_LIST_FILE))

    # If CSV exists from a previous execution, remove it.
    try:
        remove(PATH_CSVF)
    except:
        pass

    # With the list of DAT files, do find_event_pipeline()
    df_event = find_event_pipeline(PATH_DAT_LIST_FILE,
                                   filter_threshold=filter_threshold,
                                   number_in_cadence=number_in_cadence,
                                   user_validation=False,
                                   saving=True,
                                   csv_name=PATH_CSVF)

    # CSV created?
    if not Path(PATH_CSVF).exists():
        print('*** find_plot_pipelines: No CSV of events created')
        return

    # An event CSV was created.
    print('find_plot_pipelines: find_event_pipeline() returned dataframe:\n{}'.
          format(df_event))

    # Make a list of the HDF5 files.
    print(
        'find_plot_pipelines: making a list of HDF5 files in ({}) ...'.format(
            PATH_DAT_LIST_FILE))
    with open(PATH_H5_LIST_FILE, 'w') as file_handle:
        for filename_h5 in H5_FILE_LIST:
            file_handle.write('{}\n'.format(TESTDIR + filename_h5))

    # Do the plots for all of the HDF5/DAT file pairs.
    print('find_plot_pipelines: plot_event_pipeline({}, {}) ...'.format(
        PATH_CSVF, PATH_H5_LIST_FILE))
    plot_event_pipeline(PATH_CSVF, PATH_H5_LIST_FILE, user_validation=False)

    main_time_stop = time()

    print('find_plot_pipelines: End, et = {:.1f} seconds'.format(
        main_time_stop - main_time_start))
Beispiel #7
0
def main():
    print("\nExecuting program...")
    start = time.time()
    # parse any command line arguments
    cmd_args = parse_args()
    if not cmd_args["par"]:
        # get the input variables from the command arguments if no input variables file listed
        indir = cmd_args["indir"]
        clobber = cmd_args["clobber"]
        max_drift_factor = cmd_args["MaxDF"]
        min_drift_factor = cmd_args["MinDF"]
        min_SNR = cmd_args["SNR1"]
        gpu_backend = cmd_args["GPU"]
        filter_threshold = cmd_args["filter_threshold"]
        number_in_cadence = cmd_args["number_in_cadence"]
        SNR = cmd_args["SNR2"]
        outdir = cmd_args["out"]
        target_name = str(cmd_args["t"])[2:-2]
    else:  # still trying to get the input file working
        parameters = (cmd_args["par"])
        my_little_pickle(parameters)
        v = globals().update(importlib.import_module(parameters).__dict__)
        my_little_pickle(v)
    # Make sure the data directory is set correctly or else exit program
    if not os.path.isdir(indir):
        print("\n Specified directory does not exist. Exiting... \n")
        sys.exit()
    # deal with GPU stuff and set output directory either way
    if gpu_backend:
        import cupy
        outdir = indir + outdir + "_gpu/"
    else:
        outdir = indir + outdir
    # make the "processed" directory if it doesn't already exist
    if not os.path.isdir(outdir):
        os.mkdir(outdir)
    # get appropriate list of .fil files in directory to work on
    fil_list = find_input_data(indir, '.fil')
    # write .fil files to .lst file for plot_event_pipeline later
    lst_fil = outdir + "fil_files.lst"
    # purge any existing fil_files.lst before creating the new one
    if os.path.isfile(lst_fil):
        os.remove(lst_fil)
        print(
            "%s has been detected and purged to make way for the new list of '.fil' files."
            % lst_fil)
    with open(lst_fil, 'w') as f:
        for item in fil_list:
            f.write("%s\n" % item)
    assert os.path.isfile(lst_fil)

    # loop over input files, reduce to .h5 and comb for hits into .dat file
    print("\nPlease wait ...")
    drifts = []
    fil_array = []
    start_dop = time.time()
    for fil in fil_list:
        # convert filterbank file to HDF5
        print(f"\nWorking on file number %s in the list..." %
              (fil_list.index(fil) + 1))
        outfile = convert_to_h5(fil, indir=indir)
        # Get max drift rate
        print("\nGetting max drift rate based on target coordinates...\n")
        drift_rate = get_drift_rate(outfile)
        drifts.append(drift_rate)
        fil_array.append(get_file_id(fil))
        print(f"\nBarycentric drift rate (pre factor) is {drift_rate:.2f}")
        max_drift_rate = drift_rate * max_drift_factor
        min_drift_rate = drift_rate * min_drift_factor  # currently broken in find_doppler so doing workaround
        print(
            f"Using a drift rate range between {min_drift_rate:.2f} and {max_drift_rate:.2f}."
        )
        # move h5 file to output folder temporarily
        mv_h5_out = 'mv ' + os.path.basename(outfile) + " " + outdir
        call([mv_h5_out], shell=True)
        # call FindDoppler
        datfile = run_turbo_seti(outfile,
                                 min_snr=min_SNR,
                                 outdir=outdir,
                                 clobber=clobber,
                                 max_drift=max_drift_rate,
                                 gpu_backend=gpu_backend)
        # move h5 file back to input folder
        mv_h5_in = 'mv ' + outdir + os.path.basename(outfile) + " " + indir
        call([mv_h5_in], shell=True)
        # apply minimum drift rate filter to dat files if indicated
        if min_drift_rate:
            cull_dats(datfile=datfile, min_drift=min_drift_rate)
            print('\nCulling complete.')
        else:
            min_drift_factor = 0

    max_max_drift_rate = max(drifts) * max_drift_factor
    min_min_drift_rate = min(drifts) * min_drift_factor
    print(
        f'\nAll hits within drift rate range of {min_min_drift_rate:.4f} and {max_max_drift_rate:.4f} Hz/s.'
    )

    formatted_drifts = ['%.4f' % elem for elem in drifts]
    print(f"\nBarycentric drift rates (pre factor) are {formatted_drifts}")
    print(f"\nThese correspond to the files with file ids: {fil_array}")

    end_dop, time_label_dop = get_elapsed_time(start_dop)
    print(
        f"\nTotal time to populate dats with hits using find_doppler: %.2f {time_label_dop}.\n"
        % end_dop)

    # remove old dat lists even if clobber off
    print("\nRemoving old dat lists...\n")
    old_files = glob.glob(outdir + "dat*.lst")
    if not old_files:
        print("There are no old .lst files to purge. Moving on...")
    else:
        for file in old_files:
            os.remove(file)
            print("%s has been removed successfully" % file)
    # get appropriate list of .dat files in directory
    dat_list = find_input_data(outdir, '.dat')

    # call run_find_event_pipeline and make csv file to be placed in the processed folder
    os.chdir(outdir)  # change into output dir

    if target_name:
        number_in_cadence = len(dat_list)
    else:
        target_name = bool(0)

    csv_file = run_find_event_pipeline(datdir=outdir,
                                       SNR=SNR,
                                       filter_threshold=filter_threshold,
                                       number_in_cadence=number_in_cadence,
                                       on_source_complex_cadence=target_name)

    # now do the plotting
    # cross-reference the csv file above with the fil_files.lst
    if csv_file:
        print("\nRunning plot_event_pipeline...")
        plot_event_pipeline(csv_file, lst_fil)
        plot_dir = outdir + "f" + str(filter_threshold) + '_s' + str(
            int(SNR)) + '_plots/'
        if not os.path.isdir(plot_dir):
            os.mkdir(plot_dir)
        command = 'mv ' + indir + 'f' + str(
            filter_threshold) + '*.p?? ' + plot_dir
        call([command], shell=True)

    print(f"\nBarycentric drift rates (pre factor) are {formatted_drifts}")
    print(f"\nThese correspond to the files with file ids: {fil_array}")

    end, time_label = get_elapsed_time(start)
    print(f"\nTotal time to execute this program: %.2f {time_label}.\n" % end)
    return None
Beispiel #8
0
def find_plot_pipelines(need_init=True, filter_threshold=3):
    r'''
    Exercise find_event_pipeline() and plot_event_pipeline()
    '''

    main_time_start = time()

    # If configured to do so, initialize temp directory.
    if need_init:
        initialize()

    # If configured to do so, fetch all of the HDF5 files from the Internet.
    if need_init:
        for filename_h5 in H5_FILE_LIST:
            wgetter(filename_h5)

    # Make all of the DAT files.
    make_all_dat_files()

    # Create the CSV file used by plot_event_pipeline.
    print('find_plot_pipelines: Filter threshold = ', filter_threshold)
    number_in_cadence = len(H5_FILE_LIST)
    print('find_plot_pipelines: Cadence length = ', number_in_cadence)
    print('find_plot_pipelines: find_event_pipeline({}) ...'.format(
        PATH_DAT_LIST_FILE))

    # If CSV exists from a previous execution, remove it.
    try:
        remove(PATH_CSVF)
    except:
        pass

    # With the list of DAT files, do find_event_pipeline()
    df_event = find_event_pipeline(PATH_DAT_LIST_FILE,
                                   filter_threshold=filter_threshold,
                                   number_in_cadence=number_in_cadence,
                                   user_validation=False,
                                   saving=True,
                                   csv_name=PATH_CSVF)

    # CSV file created?
    if not Path(PATH_CSVF).exists():
        raise ValueError('find_plot_pipelines: No CSV of events created')

    # An event CSV was created.
    # Validate CSV file.
    df_validate = validate_csvf(PATH_CSVF)
    if df_validate.equals(df_event):
        raise ValueError(
            'find_plot_pipelines: Expected df_validate to be identical to df_event but observed\n{}'
            .format(df_validate))

    # Make a list of the HDF5 files.
    print(
        'find_plot_pipelines: making a list of HDF5 files in ({}) ...'.format(
            PATH_DAT_LIST_FILE))
    with open(PATH_H5_LIST_FILE, 'w') as file_handle:
        for filename_h5 in H5_FILE_LIST:
            file_handle.write('{}\n'.format(TESTDIR + filename_h5))

    # Do the plots for all of the HDF5/DAT file pairs.
    print('find_plot_pipelines: plot_event_pipeline({}, {}) ...'.format(
        PATH_CSVF, PATH_H5_LIST_FILE))
    plot_event_pipeline(PATH_CSVF,
                        PATH_H5_LIST_FILE,
                        filter_spec='f{}'.format(filter_threshold),
                        user_validation=False)

    # Check that the right number of PNG files were created.
    outdir_list = listdir(TESTDIR)
    npngs = 0
    for cur_file in outdir_list:
        if cur_file.split('.')[-1] == 'png':
            if imghdr.what(TESTDIR + cur_file) != 'png':
                raise ValueError(
                    'find_plot_pipelines: File {} is not a PNG file'.format(
                        cur_file))
            npngs += 1
    if npngs != 6:
        raise ValueError(
            'find_plot_pipelines: Expected to find 6 PNG files but observed {}'
            .format(npngs))

    # Stop the clock - we're done.
    main_time_stop = time()

    print('find_plot_pipelines: End, et = {:.1f} seconds'.format(
        main_time_stop - main_time_start))
def execute_pipelines(args):
    """
    Interface to the pipeline functions, called by main().

    Parameters
    ----------
    args : dict
    """

    # Setup some parameter values for find_event_pipeline().
    if args.cadence == "complex":
        complex_cadence = True
        if len(args.source_name) < 1:
            print("\n*** plotSETI: Complex cadence requires a source_name.")
            sys.exit(RETURN_ERROR)

    else:
        complex_cadence = False
        if args.cadence == "on":
            first_file = "ON"
        else:
            first_file = "OFF"
    h5_dir = os.path.abspath(args.h5_dir) + "/"
    dat_dir = os.path.abspath(args.dat_dir) + "/"
    out_dir = os.path.abspath(args.out_dir) + "/"
    if not os.path.exists(out_dir):
        os.mkdir(out_dir)
    if args.plot_offset:
        offset="auto"
    else:
        offset=0

    # Establish output pathnames,
    path_csvf = out_dir + NAME_CSVF
    clean_event_stuff(out_dir)

    # Make the h5 and dat lists.
    # Default to auto-generation?
    if args.h5dat_lists is None:
        SZ_user_list = 0
    else:
        SZ_user_list = len(args.h5dat_lists)
    if args.debug:
        print(f"DEBUG h5dats_list: #{SZ_user_list} {args.h5dat_lists}")
    if SZ_user_list == 0: # Default to auto-generation.
        path_h5_list = out_dir + NAME_H5_LIST
        path_dat_list = out_dir + NAME_DAT_LIST
        number_in_cadence = make_lists(h5_dir, path_h5_list, dat_dir, path_dat_list)
        if number_in_cadence == 0:
            return RETURN_ERROR
    else: # User-specified lists
        if SZ_user_list != 2:
            print(f"\n*** plotSETI: h5dat_lists had {SZ_user_list} elements; must be 2 (one for h5 and one for dat)!")
            return RETURN_ERROR
        if args.h5dat_lists[0] is None or args.h5dat_lists[1] is None:
            print(f"\n*** plotSETI: h5dat_lists had {SZ_user_list} elements; must be 2 (one for h5 and one for dat)!")
            return RETURN_ERROR
        # Check the list of h5 files.
        path_h5_list = args.h5dat_lists[0]
        if not os.path.exists(path_h5_list):
            print(f"\n*** plotSETI: File {path_h5_list} does not exist!")
            return RETURN_ERROR
        N_h5 = count_text_lines(path_h5_list)
        print(f"plotSETI: Found {N_h5} h5 files.")
        # Check the list of dat files.
        path_dat_list = args.h5dat_lists[1]
        if not os.path.exists(path_dat_list):
            print(f"\n*** plotSETI: File {path_dat_list} does not exist!")
            return RETURN_ERROR
        N_dat = count_text_lines(path_dat_list)
        print(f"plotSETI: Found {N_dat} dat files.")
        # Make sure that the lists are of the same size.
        if N_h5 != N_dat:
            print("\n*** plotSETI: Count of dat files must = count of h5 files!")
            return RETURN_ERROR
        number_in_cadence = N_h5

    # Run find_event_pipeline()
    if complex_cadence:
        df_check = find_event_pipeline(path_dat_list,
                            path_h5_list,
                            filter_threshold = args.filter_threshold,
                            number_in_cadence = number_in_cadence,
                            on_source_complex_cadence=args.source_name,
                            sortby_tstart=True,
                            check_zero_drift=False,
                            SNR_cut=args.snr_threshold,
                            min_drift_rate=args.min_drift_rate,
                            max_drift_rate=args.max_drift_rate,
                            user_validation=False,
                            csv_name=path_csvf,
                            saving=True)
    else: # not a complex cadence
        df_check = find_event_pipeline(path_dat_list,
                            path_h5_list,
                            filter_threshold = args.filter_threshold,
                            number_in_cadence = number_in_cadence,
                            on_source_complex_cadence=False,
                            on_off_first=first_file,
                            sortby_tstart=True,
                            check_zero_drift=False,
                            SNR_cut=args.snr_threshold,
                            min_drift_rate=args.min_drift_rate,
                            max_drift_rate=args.max_drift_rate,
                            user_validation=False,
                            csv_name=path_csvf,
                            saving=True)

    if df_check is None:
        print("\n*** plotSETI: No events produced in find_event_pipeline()!")
        return RETURN_ERROR

    # Make the plots for all of the HDF5/DAT file pairs in batch mode.
    matplotlib.use("agg", force=True)
    plot_event_pipeline(path_csvf,
                        path_h5_list,
                        plot_dir=out_dir,
                        filter_spec=args.filter_threshold,
                        offset=offset,
                        user_validation=False)

    print(f"\nplotSETI: Plots are stored in directory {out_dir}.")

    return RETURN_NORMAL
Beispiel #10
0
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
import glob
from turbo_seti.find_event.plot_event_pipeline import plot_event_pipeline

path='/home/charlesg/turboSETI/trappist1/fil_files/high/'


filelist = glob.glob(path+'*.fil')

with open('fil_files.lst', 'w') as f:
    for item in filelist:
        f.write("%s\n" % item)

threshold=3
snr=10
target='TRAPPIST1'

csv_path='/home/charlesg/turboSETI/trappist1/'
csv_string='DIAG_'+target+'_f'+str(threshold)+'_snr'+str(snr)+'.csv'

plot_event_pipeline(csv_string,
                    'fil_files.lst',
                    user_validation=True)