Beispiel #1
0
def test_tovw():
    x = np.array([[1.2, 3.4, 5.6, 1.0, 10], [7.8, 9.10, 11, 0, 20]])
    y = np.array([2, 0])
    w = [1, 2]

    expected = ['1 1 | 0:1.2 1:3.4 2:5.6 3:1 4:10',
                '-1 2 | 0:7.8 1:9.1 2:11 4:20']

    assert tovw(x=x, y=y, sample_weight=w, convert_labels=True) == expected
    assert tovw(x=csr_matrix(x), y=y, sample_weight=w, convert_labels=True) == expected
def test_tovw():
    x = np.array([[1.2, 3.4, 5.6, 1.0, 10], [7.8, 9.10, 11, 0, 20]])
    y = np.array([1, -1])
    w = [1, 2]

    expected = ['1 1 | 0:1.2 1:3.4 2:5.6 3:1 4:10',
                '-1 2 | 0:7.8 1:9.1 2:11 4:20']

    assert tovw(x=x, y=y, sample_weight=w) == expected

    assert tovw(x=csr_matrix(x), y=y, sample_weight=w) == expected
Beispiel #3
0
def save_to_vw(filepath: str, X: pd.DataFrame, y: pd.Series=None, chunk_size=1000):
    with open(filepath, "w+") as f:
        for pos in range(0, len(X), chunk_size):
            chunk_X = X.iloc[pos:pos + chunk_size, :]
            chunk_y = y.iloc[pos:pos + chunk_size] if y is not None else None
            for row in tovw(chunk_X, chunk_y):
                f.write(row + "\n")