コード例 #1
0
def test_by_time_period_from_7670_to_8400():
    expected = Table.from_pydict({
        'unit_id': [
            1000000002, 1000000004, 1000000003, 1000000001,
            1000000001, 1000000003, 1000000003, 1000000001,
            1000000002
        ],
        'value': ["8", "2", "12", "3", "16", "2", "12", "3", "8"],
        'start_epoch_days': [
            1461, 3287, 4018, 5479, 7851, 7701, 7957, 8126, 8066
        ],
        'stop_epoch_days': [
            8065, 7710, 7700, 7850, 8125, 7956, np.nan, np.nan, np.nan
        ]
    })
    print_expected(expected)

    actual = filter_by_time_period(
        TEST_BOSTED_PARQUET_DIR, 7670, 8400, None, True
    )
    print_actual(actual)

    assert_frame_equal(
        expected.to_pandas(), actual.to_pandas(), check_dtype=False
    )
コード例 #2
0
ファイル: network_sender.py プロジェクト: sadeemsaleh/texera
    def _send_data(self, to: ActorVirtualIdentity, data_payload: DataPayload) -> None:
        """
        Send data payload to the given target actor. This method is to be used internally only.

        :param to: The target actor's ActorVirtualIdentity
        :param data_payload: The data payload to be sent, can be either DataFrame or EndOfUpstream
        """

        if isinstance(data_payload, OutputDataFrame):
            # converting from a column-based dictionary is the fastest known method
            # https://stackoverflow.com/questions/57939092/fastest-way-to-construct-pyarrow-table-row-by-row
            field_names = data_payload.schema.names
            table = Table.from_pydict(
                {name: [t[name] for t in data_payload.frame] for name in field_names},
                schema=data_payload.schema
            )
            data_header = PythonDataHeader(tag=to, is_end=False)
            self._proxy_client.send_data(bytes(data_header), table)

        elif isinstance(data_payload, EndOfUpstream):
            data_header = PythonDataHeader(tag=to, is_end=True)
            self._proxy_client.send_data(bytes(data_header), None)

        else:
            raise TypeError(f"Unexpected payload {data_payload}")
コード例 #3
0
def test_by_time_excluding_attributes():
    expected = Table.from_pydict({
        'unit_id': [1000000002, 1000000004, 1000000003, 1000000001],
        'value': ["8", "2", "12", "3"]
    })
    print_expected(expected)

    actual = filter_by_time(TEST_BOSTED_PARQUET_DIR, 7669)
    print_actual(actual)

    assert_frame_equal(
        expected.to_pandas(), actual.to_pandas(), check_dtype=False
    )
コード例 #4
0
def test_by_time():
    expected = Table.from_pydict({
        'unit_id': [1000000002, 1000000004, 1000000003, 1000000001],
        'value': ["8", "2", "12", "3"],
        'start_epoch_days': [1461, 3287, 4018, 5479],
        'stop_epoch_days': [8065, 7710, 7700, 7850]
    })
    print_expected(expected)

    actual = filter_by_time(TEST_BOSTED_PARQUET_DIR, 7669, None, True)
    print_actual(actual)

    assert_frame_equal(
        expected.to_pandas(), actual.to_pandas(), check_dtype=False
    )
コード例 #5
0
def test_by_fixed_including_population_filter_on_single_parquet():
    parquet_file = TEST_PERSON_INCOME_PARQUET_FILE
    population_filter = [11111113785911, 11111111190644]

    expected = Table.from_pydict({
        'unit_id': [11111113785911, 11111111190644],
        'value': ["16354872", "11331198"],
        'start_epoch_days': [13879, 6209],
        'stop_epoch_days': [14244, 6573]
    })
    print_expected(expected)

    actual = filter_by_fixed(parquet_file, population_filter, True)
    if actual:
        print_actual(actual)

    assert_frame_equal(
        expected.to_pandas(), actual.to_pandas(), check_dtype=False
    )
コード例 #6
0
def test_by_fixed_including_attributes_and_population_filter():
    population_filter = [1000000002, 1000000003]

    expected = Table.from_pydict({
        'unit_id': [
            1000000002, 1000000003, 1000000003, 1000000003, 1000000002
        ],
        'value': ["8", "12", "2", "12", "8"],
        'start_epoch_days': [1461, 4018, 7701, 7957, 8066],
        'stop_epoch_days': [8065, 7700, 7956, np.nan, np.nan]
    })
    print_expected(expected)

    actual = filter_by_fixed(TEST_BOSTED_PARQUET_DIR, population_filter, True)
    print_actual(actual)

    assert_frame_equal(
        expected.to_pandas(), actual.to_pandas(), check_dtype=False
    )
コード例 #7
0
print("Initialize From a dictionary")
data_dictionary = {
    'col-1': [1, 2, 3, 4],
    'col-2': [5, 6, 7, 8],
    'col-3': [9, 10, 11, 12]
}
tb: Table = Table.from_pydict(ctx, data_dictionary)
print(tb)

print("Initialize From a PyArrow Table")
data_dictionary = {
    'col-1': [1, 2, 3, 4],
    'col-2': [5, 6, 7, 8],
    'col-3': [9, 10, 11, 12]
}
atb: ATable = ATable.from_pydict(data_dictionary)
tb: Table = Table.from_arrow(ctx, atb)
print(atb)
print(tb)

print("Initialize From Numpy")
nd_array_list = [
    np.array([1, 2, 3, 4]),
    np.array([5, 6, 7, 8]),
    np.array([9, 10, 11, 12])
]
columns = ['c1', 'c2', 'c3']
tb: Table = Table.from_numpy(ctx, columns, nd_array_list)
print(tb)

print("Initialize From Pandas")