Ejemplo n.º 1
0
def get_data(input_files):
    """read in data from one or more files. In the case that there are multiple files, the data from all files
    will be combined into a single CI vs FWHM dataset.

    Parameters
    ----------
    input_files : list
        list of CI vs. FWHM .csv files to process

    Returns
    -------
    data_table: : numpy.ndarray
        A 2 x n sized numpy array. Column 1: CI values; Column 2: FWHM values
    """
    data_table = Table()
    for filename in input_files:
        if filename.endswith(".csv"):
            data_table = vstack(
                [data_table,
                 Table.read(filename, format='ascii.csv')])
        if filename.endswith(".json"):
            json_data = read_json_file(filename)
            data_table = vstack([data_table, json_data['data']['CI_FWHM']])
    return data_table
def get_json_files(
        search_path=os.getcwd(),
        search_patterns=["*_svm_*.json", "*_mvm_*.json", "*_cal_qa_*.json"],
        log_level=logutil.logging.INFO):
    """use glob to create a list of json files to harvest
    
    This function looks for all the json files containing qa test results generated
    by `runastrodriz` and `runsinglehap`.  The search starts in the directory 
    specified in the `search_path` parameter, but will look in immediate
    sub-directories as well if no json files are located in the directory 
    specified by `search_path`.

    Parameters
    ----------
    search_path : str, optional
        directory path to search for .json files. Default value is the current working directory.
        This serves as the starting directory for finding the .json files, with the
        search expanding to immediate sub-directories if no .json files are found
        in this directory.

    log_level : int, optional
        The desired level of verboseness in the log statements displayed on the screen and written to the
        .log file. Default value is 'INFO'.

    Returns
    -------
    out_json_dict : ordered dictionary
        dictionary containing lists of all identified json files, grouped by and  keyed by Pandas DataFrame
        index value
    """
    log.setLevel(log_level)

    # set up search string and use glob to get list of files
    json_list = []
    for search_pattern in search_patterns:
        search_string = os.path.join(search_path, search_pattern)
        search_results = glob.glob(search_string)
        if len(search_results) == 0:
            # Try another directory lower
            search_string = os.path.join(search_path, '*', search_pattern)
            search_results = glob.glob(search_string)

        log.info("{} files found: {}".format(search_pattern,
                                             len(search_results)))
        if len(search_results) > 0:
            json_list += search_results

    # store json filenames in a dictionary keyed by Pandas DataFrame index value
    if json_list:
        out_json_dict = collections.OrderedDict()
        for json_filename in sorted(json_list):
            json_data = du.read_json_file(json_filename)
            dataframe_idx = json_data['general information']['dataframe_index']
            if dataframe_idx in out_json_dict.keys():
                out_json_dict[dataframe_idx].append(json_filename)
            else:
                out_json_dict[dataframe_idx] = [json_filename]
            del (json_data)  # Housekeeping!

    # Fail gracefully if no .json files were found
    else:
        err_msg = "No .json files were found!"
        log.error(err_msg)
        raise Exception(err_msg)
    return out_json_dict
def make_dataframe_line(json_filename_list, log_level=logutil.logging.INFO):
    """extracts information from the json files specified by the input list *json_filename_list*.

    Parameters
    ----------
    json_filename_list : list
        list of json files to process

    log_level : int, optional
        The desired level of verboseness in the log statements displayed on the screen and written to the
        .log file. Default value is 'INFO'.

    Returns
    -------
    ingest_dict : collections.OrderedDict
        ordered dictionary containing all information extracted from json files specified by the input list
        *json_filename_list*.
    """
    log.setLevel(log_level)
    header_ingested = False
    gen_info_ingested = False
    ingest_dict = collections.OrderedDict()
    ingest_dict['data'] = collections.OrderedDict()
    ingest_dict['descriptions'] = collections.OrderedDict()
    ingest_dict['units'] = collections.OrderedDict()
    for json_filename in json_filename_list:
        short_json_filename = os.path.basename(json_filename)
        # This is to differentiate point catalog compare_sourcelists columns from segment catalog
        # compare_sourcelists columns in the dataframe
        if json_filename.endswith("_point-cat_svm_compare_sourcelists.json"):
            title_suffix = "hap_vs_hla_point_"
        elif json_filename.endswith(
                "_segment-cat_svm_compare_sourcelists.json"):
            title_suffix = "hap_vs_hla_segment_"
        else:
            title_suffix = ""
        json_data = du.read_json_file(json_filename)
        # add information from "header" section to ingest_dict just once
        if not header_ingested:
            # filter out ALL header keywords not included in 'header_keywords_to_keep'
            header_keywords_to_keep = [
                'APERTURE', 'CHINJECT', 'DATE-OBS', 'DEC_TARG', 'EXPTIME',
                'FGSLOCK', 'GYROMODE', 'MTFLAG', 'OBSKEY', 'OBSTYPE',
                'RA_TARG', 'SCAN_TYP', 'SUBARRAY', 'TIME-OBS'
            ]
            for header_item in json_data['header'].keys():
                if header_item in header_keywords_to_keep:
                    ingest_dict["data"][
                        "header." +
                        header_item] = json_data['header'][header_item]
            header_ingested = True

        # add information from "general information" section to ingest_dict just once
        if not gen_info_ingested:
            for gi_item in json_data['general information'].keys():
                ingest_dict["data"][
                    "gen_info." +
                    gi_item] = json_data['general information'][gi_item]
            gen_info_ingested = True

        # recursively flatten nested "data" section dictionaries and build ingest_dict
        flattened_data = flatten_dict(json_data['data'])
        flattened_descriptions = flatten_dict(json_data['descriptions'])
        flattened_units = flatten_dict(json_data['units'])
        for fd_key in flattened_data.keys():
            json_data_item = flattened_data[fd_key]
            ingest_key = fd_key.replace(" ", "_")
            if str(type(
                    json_data_item)) == "<class 'astropy.table.table.Table'>":
                for coltitle in json_data_item.colnames:
                    ingest_value = json_data_item[coltitle].tolist()
                    id_key = title_suffix + ingest_key + "." + coltitle
                    ingest_dict["data"][id_key] = [ingest_value]
                    # print(">>>>",short_json_filename,id_key, title_suffix + fd_key + "." + coltitle)
                    try:
                        ingest_dict["descriptions"][
                            id_key] = flattened_descriptions[fd_key + "." +
                                                             coltitle]
                        log.debug("Added Description {} = {}".format(
                            id_key,
                            flattened_descriptions[fd_key + "." + coltitle]))
                    except:  # insert placeholders if the code runs into trouble getting descriptions
                        log.warning(
                            "Descriptions not found for {} in file{}. Using placeholder value '>>>UNDEFINED<<<' instead."
                            .format(id_key, short_json_filename))
                        ingest_dict["descriptions"][id_key] = ">>>UNDEFINED<<<"

                    try:
                        ingest_dict["units"][id_key] = flattened_units[
                            fd_key + "." + coltitle]
                        log.debug("Added Unit {} = {}".format(
                            id_key, flattened_units[fd_key + "." + coltitle]))
                    except:  # insert placeholders if the code runs into trouble getting units
                        log.warning(
                            "Units not found for {} in file {}. Using placeholder value '>>>UNDEFINED<<<' instead."
                            .format(id_key, short_json_filename))
                        ingest_dict["units"][id_key] = ">>>UNDEFINED<<<"

            else:
                ingest_value = json_data_item
                id_key = title_suffix + ingest_key
                if str(type(ingest_value)) == "<class 'list'>":
                    ingest_dict["data"][id_key] = [ingest_value]
                else:
                    ingest_dict["data"][id_key] = ingest_value
                try:
                    ingest_dict["descriptions"][
                        id_key] = flattened_descriptions[fd_key]
                    log.debug("Added Description {} = {}".format(
                        id_key, flattened_descriptions[fd_key]))
                except:  # insert placeholders if the code runs into trouble getting the descriptions
                    log.warning(
                        "Descriptions not found for {} in file {}. Using placeholder value '>>>UNDEFINED<<<' instead."
                        .format(id_key, short_json_filename))
                    ingest_dict["descriptions"][id_key] = ">>>UNDEFINED<<<"

                try:
                    ingest_dict["units"][id_key] = flattened_units[fd_key]
                    log.debug("Added unit {} = {}".format(
                        id_key, flattened_units[fd_key]))
                except:  # insert placeholders if the code runs into trouble getting units
                    log.warning(
                        "Units not found for {} in file {}. Using placeholder value '>>>UNDEFINED<<<' instead."
                        .format(id_key, short_json_filename))
                    ingest_dict["units"][id_key] = ">>>UNDEFINED<<<"

    return ingest_dict