def execute(file_name): categories = ['distinguished', 'removal_reason'] f = load(file_name) batches = partition_all(200000, f) df, frames = peek(map(to_df, batches)) castra = Castra('./subreddit_dumps/'+file_name+'.castra', template = df, categories = categories) castra.extend_sequence(frames, freq = '3h')
def execute(file_name): categories = ['distinguished', 'removal_reason'] f = load(file_name) batches = partition_all(200000, f) df, frames = peek(map(to_df, batches)) castra = Castra('./subreddit_dumps/' + file_name + '.castra', template=df, categories=categories) castra.extend_sequence(frames, freq='3h')
def test_readonly(): path = tempfile.mkdtemp(prefix='castra-') try: c = Castra(path=path, template=A) c.extend(A) d = Castra(path=path, readonly=True) with pytest.raises(IOError): d.extend(B) with pytest.raises(IOError): d.extend_sequence([B]) with pytest.raises(IOError): d.flush() with pytest.raises(IOError): d.drop() with pytest.raises(IOError): d.save_partitions() with pytest.raises(IOError): d.flush_meta() assert c.columns == d.columns assert (c.partitions == d.partitions).all() assert c.minimum == d.minimum finally: shutil.rmtree(path)