def test_tovw(): x = np.array([[1.2, 3.4, 5.6, 1.0, 10], [7.8, 9.10, 11, 0, 20]]) y = np.array([2, 0]) w = [1, 2] expected = ['1 1 | 0:1.2 1:3.4 2:5.6 3:1 4:10', '-1 2 | 0:7.8 1:9.1 2:11 4:20'] assert tovw(x=x, y=y, sample_weight=w, convert_labels=True) == expected assert tovw(x=csr_matrix(x), y=y, sample_weight=w, convert_labels=True) == expected
def test_tovw(): x = np.array([[1.2, 3.4, 5.6, 1.0, 10], [7.8, 9.10, 11, 0, 20]]) y = np.array([1, -1]) w = [1, 2] expected = ['1 1 | 0:1.2 1:3.4 2:5.6 3:1 4:10', '-1 2 | 0:7.8 1:9.1 2:11 4:20'] assert tovw(x=x, y=y, sample_weight=w) == expected assert tovw(x=csr_matrix(x), y=y, sample_weight=w) == expected
def save_to_vw(filepath: str, X: pd.DataFrame, y: pd.Series=None, chunk_size=1000): with open(filepath, "w+") as f: for pos in range(0, len(X), chunk_size): chunk_X = X.iloc[pos:pos + chunk_size, :] chunk_y = y.iloc[pos:pos + chunk_size] if y is not None else None for row in tovw(chunk_X, chunk_y): f.write(row + "\n")