def main(): URL = "projects.csv" b = brewery.create_builder() b.csv_source(URL,encoding="UTF8") b.audit(distinct_threshold=None) b.pretty_printer() b.stream.run()
""" Data Brewery Example Aggregate a remote CSV file. """ import brewery main = brewery.create_builder() main.csv_source( "https://raw.github.com/Stiivi/cubes/master/examples/hello_world/data.csv") main.node.fields = brewery.FieldList([ "category_code", "category", "subcategory_code", "subcategory", "line_item", "year", ["amount", "float"] ]) main.aggregate(keys=["year", "category"], measures=["amount"]) main.field_map(keep_fields=["year", "category", "amount_sum"]) main.pretty_printer() main.stream.run()
Example: How to use a generator function as a streaming data source. """ import brewery import random # Create a generator function def generator(count=10, low=0, high=100): for i in range(0, count): yield [i, random.randint(low, high)] # Create stream builder (HOM-based) main = brewery.create_builder() main.generator_function_source(generator, fields=brewery.FieldList(["i", "roll"])) # Configure node with this: # # main.node.kwargs = {"count":100, "high":10} # Uncomment this: # # fork = main.fork() # fork.csv_target("random.csv") main.formatted_printer() main.stream.run()
def test_basic(self): main = brewery.create_builder() main.csv_source("foo") self.assertEqual(1, len(main.stream.nodes)) self.assertEqual("csv_source", main.node.identifier())