def unintersected_files(source_marker: str) -> FileList:
    """
    List files which does not intersect in UT.
    :param source_marker: identificator of a data source.
    :return list of files not intersecting in UT.
    """
    deduplicated_files_list = deduplicated_files(source_marker)
    path, parser_class, selector, features_extractor = resolve_data_source(source_marker)

    unitersected: List[Any] = []
    previous_key = None
    for filename in sorted(deduplicated_files_list):
        data = parsed_data(source_marker, filename)
        features = features_extractor(basename(filename))
        key = '{year}.{day}'.format(**features)
        ut = data.get('ut', transposed=True)[0]

        if key != previous_key or len(unitersected) == 0:
            previous_key = key
            unitersected.append((filename, ut[0], ut[-1]))
            continue

        if ut[0] <= unitersected[-1][1] <= ut[-1] or ut[0] <= unitersected[-1][2] <= ut[-1]:
            unitersected.pop()
        else:
            unitersected.append((filename, ut[0], ut[-1]))

    return [filename for filename, start, end in unitersected]
Ejemplo n.º 2
0
def test_source_nacs_selector() -> None:
    _, _, selector, _ = resolve_data_source(DE2_SOURCE_NACS)
    for filename in NACS_TEST_FILES_ALL:
        assert selector(filename)

    for filename in WATS_TEST_FILES_ALL:
        assert not selector(filename)
Ejemplo n.º 3
0
def test_source_wats_features() -> None:
    _, _, _, extractor = resolve_data_source(DE2_SOURCE_WATS)
    for filename in WATS_TEST_FILES_ALL:
        year = filename[:4]
        day = filename[4:7]
        features = extractor(filename)
        assert year == features['year']
        assert day == features['day']
Ejemplo n.º 4
0
def all_files(source_marker: str) -> FileList:
    """
    List all (good and bad) files related to a data source.
    :param source_marker: identificator of a data source.
    :return list of all files in data source.
    """
    path, parser_class, selector, features_extractor = resolve_data_source(
        source_marker)
    return [join(path, file) for file in listdir(path) if selector(file)]
def test_make_continuity_filter_empty() -> None:
    path, _, _, _ = resolve_data_source(DE2_SOURCE_NACS)
    data = parsed_data(DE2_SOURCE_NACS,
                       join(path, '1981295T001140_0_DE2_NACS_1S_V01.ASC'))
    is_continuous = make_continuity_filter(data, tuple())
    assert is_continuous(3 - 3) is True
    assert is_continuous(4 - 3) is True
    assert is_continuous(5 - 3) is True
    assert is_continuous(6 - 3) is True
    assert is_continuous(7 - 3) is True
    assert is_continuous(8 - 3) is True
def test_make_continuity_filter_o_dens_he_dens() -> None:
    path, _, _, _ = resolve_data_source(DE2_SOURCE_NACS)
    data = parsed_data(DE2_SOURCE_NACS,
                       join(path, '1981295T001140_0_DE2_NACS_1S_V01.ASC'))
    is_continuous = make_continuity_filter(data, ('o_dens', 'he_dens'))
    assert is_continuous(1698 - 3) is False
    assert is_continuous(1699 - 3) is True
    assert is_continuous(1700 - 3) is False
    assert is_continuous(1701 - 3) is False
    assert is_continuous(1702 - 3) is True
    assert is_continuous(1703 - 3) is False
    assert is_continuous(1704 - 3) is False
def test_simple_nacs_parse() -> None:
    path, _, _, _ = resolve_data_source(DE2_SOURCE_NACS)
    parser = FileParser(SourceNACSRow,
                        join(path, '1981295T072140_0_DE2_NACS_1S_V01.ASC'))
    utsod = parser.get('ut', 'o_dens')
    assert utsod.shape == (1546, 2)

    assert utsod[0][0] == 26573128
    assert utsod[1][0] == 26574128

    assert utsod[0][1] == 7.940923E+07
    assert utsod[1][1] == 8.019976E+07
def test_simple_wats() -> None:
    path, _, _, _ = resolve_data_source(DE2_SOURCE_WATS)
    parser = FileParser(SourceWATSRow, join(path,
                                            '1982229_de2_wats_2s_v01.asc'))

    utsod = parser.get('ut', 'tn')
    assert utsod.shape == (4826, 2)

    assert utsod[0][0] == 9762724
    assert utsod[1][0] == 9764724

    assert utsod[0][1] == 822.8
    assert utsod[1][1] == 830.5
Ejemplo n.º 9
0
def test_source_nacs_features() -> None:
    _, _, _, extractor = resolve_data_source(DE2_SOURCE_NACS)
    for filename in NACS_TEST_FILES_ALL:
        year = filename[:4]
        day = filename[4:7]
        hour = filename[8:10]
        minute = filename[10:12]
        second = filename[12:14]
        features = extractor(filename)
        assert year == features['year']
        assert day == features['day']
        assert hour == features['hour']
        assert minute == features['minute']
        assert second == features['second']
Ejemplo n.º 10
0
def list_days(source_marker: str,
              filtered: bool = False) -> List[Tuple[int, int]]:
    """
    List all days available for a data source.
    :param source_marker: identificator of a data source.
    :return list of tuples, where first element is a year and second is a day.
    """
    files_list = all_files(source_marker) if not filtered else filtered_files(
        source_marker)
    path, parser_class, selector, features_extractor = resolve_data_source(
        source_marker)
    days_set = set()
    for file in files_list:
        features = features_extractor(file)
        days_set.add((int(features['year'], 10), int(features['day'], 10)))

    return list(days_set)
Ejemplo n.º 11
0
def test_moving_average() -> None:
    """
    To produce new test data you may use formula for spreadsheets (for column C2, where data at B2:B1547):
    ```
    =AVERAGE(
        INDIRECT(
            CONCATENATE("R[-", MIN(COUNT(B$2:B2), COUNT(B2:B$1547), 351) - 1, "]C[-1]"), false
        ):INDIRECT(
            CONCATENATE("R[",MIN(COUNT(B$2:B2), COUNT(B2:B$1547), 351) - 1, "]C[-1]"), false
        )
    )
    ```
    """
    path, _, _, _ = resolve_data_source(DE2_SOURCE_NACS)
    parser = FileParser(SourceNACSRow,
                        join(path, '1981295T072140_0_DE2_NACS_1S_V01.ASC'))
    moving_average_result = moving_average(parser, 'o_dens', window_size=701)
    assert len(moving_average_result) == 1
    assert len(moving_average_result[0]) == len(o_dens_avg)
    assert all(
        abs([
            moving_average_result[0][i] - o_dens_avg[i]
            for i in range(len(o_dens_avg))
        ]) < 10**-2)
Ejemplo n.º 12
0
def parsed_data(source_marker: str, filename: str) -> FileParser:
    path, parser_class, selector, features_extractor = resolve_data_source(
        source_marker)
    return FileParser(parser_class, filename)