Beispiel #1
0
def fetch_full_san_data(stream_key, time_range, location_metadata=None):
    """
    Given a time range and stream key.  Genereate all data in the inverval using data
    from the SAN.
    :param stream_key:
    :param time_range:
    :return:
    """
    if location_metadata is None:
        location_metadata = get_san_location_metadata(stream_key, time_range)
    # get which bins we can gather data from
    ref_des_dir, dir_string = get_SAN_directories(stream_key, split=True)
    if not os.path.exists(ref_des_dir):
        log.warning("Reference Designator does not exist in offloaded DataSAN")
        return None
    data = []
    next_index = 0
    for time_bin in location_metadata.bin_list:
        direct = dir_string.format(time_bin)
        if os.path.exists(direct):
            # get data from all of the  deployments
            deployments = os.listdir(direct)
            for deployment in deployments:
                full_path = os.path.join(direct, deployment)
                if os.path.isdir(full_path):
                    new_data = get_deployment_data(full_path, stream_key.stream_name, -1, time_range,
                                                   index_start=next_index)
                    if new_data is not None:
                        data.append(new_data)
                        # Keep track of indexes so they are unique in the final dataset
                        next_index += len(new_data['index'])
    if len(data) == 0:
        return None
    return xray.concat(data, dim='index')
Beispiel #2
0
def fetch_nsan_data(stream_key,
                    time_range,
                    num_points=1000,
                    location_metadata=None):
    """
    Given a time range and stream key.  Genereate evenly spaced times over the inverval using data
    from the SAN.
    :param stream_key:
    :param time_range:
    :param num_points:
    :return:
    """
    if location_metadata is None:
        location_metadata = get_san_location_metadata(stream_key, time_range)
    ref_des_dir, dir_string = get_SAN_directories(stream_key, split=True)
    if not os.path.exists(ref_des_dir):
        log.warning("Reference Designator does not exist in offloaded SAN")
        return None
    to_sample = get_SAN_samples(num_points, location_metadata)
    # now get data in the present we are going to start by grabbing first file in the directory with name that matches
    # grab a random amount of particles from that file if they are within the time range.
    missed = 0
    data = []
    next_index = 0
    futures = []
    for time_bin, num_data_points in to_sample:
        direct = dir_string.format(time_bin)
        if os.path.exists(direct):
            # get data from all of the  deployments
            deployments = os.listdir(direct)

            for deployment in deployments:
                full_path = os.path.join(direct, deployment)
                if os.path.isdir(full_path):
                    futures.append(
                        san_threadpool.apply_async(
                            get_deployment_data,
                            (full_path, stream_key.stream_name,
                             num_data_points, time_range),
                            kwds={'index_start': next_index}))
        else:
            missed += num_data_points

    for future in futures:
        new_data = future.get()
        if new_data is None:
            missed += num_data_points
            continue
        count = len(new_data['index'])
        missed += (num_data_points - count)
        data.append(new_data)
        # keep track of the indexes so that the final dataset has unique indices
        next_index += len(new_data['index'])

    log.warn(
        "SAN: Failed to produce {:d} points due to nature of sampling".format(
            missed))
    return compile_datasets(data)
Beispiel #3
0
def fetch_nsan_data(stream_key, time_range, num_points=1000, location_metadata=None):
    """
    Given a time range and stream key.  Genereate evenly spaced times over the inverval using data
    from the SAN.
    :param stream_key:
    :param time_range:
    :param num_points:
    :return:
    """
    if location_metadata is None:
        location_metadata = get_san_location_metadata(stream_key, time_range)
    ref_des_dir, dir_string = get_SAN_directories(stream_key, split=True)
    if not os.path.exists(ref_des_dir):
        log.warning("Reference Designator does not exist in offloaded SAN")
        return None
    to_sample = get_SAN_samples(num_points, location_metadata)
    # now get data in the present we are going to start by grabbing first file in the directory with name that matches
    # grab a random amount of particles from that file if they are within the time range.
    missed = 0
    data = []
    next_index = 0
    futures = []
    for time_bin, num_data_points in to_sample:
        direct = dir_string.format(time_bin)
        if os.path.exists(direct):
            # get data from all of the  deployments
            deployments = os.listdir(direct)

            for deployment in deployments:
                full_path = os.path.join(direct, deployment)
                if os.path.isdir(full_path):
                    futures.append(
                        SessionManager.pool().apply_async(get_deployment_data,
                                                          (full_path, stream_key.stream_name, num_data_points,
                                                           time_range),
                                                          kwds={'index_start': next_index}))
        else:
            missed += num_data_points

    for future in futures:
        new_data = future.get()
        if new_data is None:
            missed += num_data_points
            continue
        count = len(new_data['index'])
        missed += (num_data_points - count)
        data.append(new_data)
        # keep track of the indexes so that the final dataset has unique indices
        next_index += len(new_data['index'])

    log.warn("SAN: Failed to produce {:d} points due to nature of sampling".format(missed))
    return compile_datasets(data)
Beispiel #4
0
def fetch_full_san_data(stream_key, time_range, location_metadata=None):
    """
    Given a time range and stream key.  Genereate all data in the inverval using data
    from the SAN.
    :param stream_key:
    :param time_range:
    :return:
    """
    if location_metadata is None:
        location_metadata = get_san_location_metadata(stream_key, time_range)
    # get which bins we can gather data from
    ref_des_dir, dir_string = get_SAN_directories(stream_key, split=True)
    if not os.path.exists(ref_des_dir):
        log.warning("Reference Designator does not exist in offloaded DataSAN")
        return None
    data = []
    next_index = 0
    for time_bin in location_metadata.bin_list:
        direct = dir_string.format(time_bin)
        if os.path.exists(direct):
            # get data from all of the  deployments
            deployments = os.listdir(direct)
            for deployment in deployments:
                full_path = os.path.join(direct, deployment)
                if os.path.isdir(full_path):
                    new_data = get_deployment_data(full_path,
                                                   stream_key.stream_name,
                                                   -1,
                                                   time_range,
                                                   index_start=next_index)
                    if new_data is not None:
                        data.append(new_data)
                        # Keep track of indexes so they are unique in the final dataset
                        next_index += len(new_data['index'])
    if not data:
        return None
    return xr.concat(data, dim='index')