def execute(file_name):
    categories = ['distinguished', 'removal_reason']
    f = load(file_name)
    batches = partition_all(200000, f)
    df, frames = peek(map(to_df, batches))
    castra = Castra('./subreddit_dumps/'+file_name+'.castra',
                    template = df, categories = categories)
    castra.extend_sequence(frames, freq = '3h')
Beispiel #2
0
def execute(file_name):
    categories = ['distinguished', 'removal_reason']
    f = load(file_name)
    batches = partition_all(200000, f)
    df, frames = peek(map(to_df, batches))
    castra = Castra('./subreddit_dumps/' + file_name + '.castra',
                    template=df,
                    categories=categories)
    castra.extend_sequence(frames, freq='3h')
Beispiel #3
0
def test_readonly():
    path = tempfile.mkdtemp(prefix='castra-')
    try:
        c = Castra(path=path, template=A)
        c.extend(A)
        d = Castra(path=path, readonly=True)
        with pytest.raises(IOError):
            d.extend(B)
        with pytest.raises(IOError):
            d.extend_sequence([B])
        with pytest.raises(IOError):
            d.flush()
        with pytest.raises(IOError):
            d.drop()
        with pytest.raises(IOError):
            d.save_partitions()
        with pytest.raises(IOError):
            d.flush_meta()
        assert c.columns == d.columns
        assert (c.partitions == d.partitions).all()
        assert c.minimum == d.minimum
    finally:
        shutil.rmtree(path)
Beispiel #4
0
def test_readonly():
    path = tempfile.mkdtemp(prefix='castra-')
    try:
        c = Castra(path=path, template=A)
        c.extend(A)
        d = Castra(path=path, readonly=True)
        with pytest.raises(IOError):
            d.extend(B)
        with pytest.raises(IOError):
            d.extend_sequence([B])
        with pytest.raises(IOError):
            d.flush()
        with pytest.raises(IOError):
            d.drop()
        with pytest.raises(IOError):
            d.save_partitions()
        with pytest.raises(IOError):
            d.flush_meta()
        assert c.columns == d.columns
        assert (c.partitions == d.partitions).all()
        assert c.minimum == d.minimum
    finally:
        shutil.rmtree(path)