예제 #1
0
    def setUp(self):
        # Stream we have here:
        #
        #  source ---+---> aggregate ----> aggtarget
        #            |
        #            +---> sample ----> map ----> target

        self.fields = ds.fieldlist(["a", "b", "c", "str"])
        self.src_list = [[1,2,3,"a"], [4,5,6,"b"], [7,8,9,"a"]]
        self.target_list = []
        self.aggtarget_list = []
        
        nodes = {
            "source": RowListSourceNode(self.src_list, self.fields),
            "target": RecordListTargetNode(self.target_list),
            "aggtarget": RecordListTargetNode(self.aggtarget_list),
            "sample": SampleNode("sample"),
            "map": FieldMapNode(drop_fields = ["c"]),
            "aggregate": AggregateNode(keys = ["str"])
        }
        
        connections = {
            ("source", "sample"),
            ("sample", "map"),
            ("map", "target"),
            ("source", "aggregate"),
            ("aggregate", "aggtarget")
        }

        self.stream = Stream(nodes, connections)
예제 #2
0
import sys
import brewery.ds as ds
import brewery.dq as dq
from chardet.universaldetector import UniversalDetector

filename = sys.argv[1]

detector = UniversalDetector()
for line in file(filename, 'rb'):
    detector.feed(line)
    if detector.done: break
detector.close()

src = ds.CSVDataSource(filename, read_header = True, encoding=detector.result["encoding"], delimiter=',' )
src.initialize()
if len(src.field_names) == 1:
  src.finalize()
  src = ds.CSVDataSource(filename, read_header = True, encoding=detector.result["encoding"], delimiter=';' )
  src.initialize()
  
out = ds.CSVDataTarget(sys.stdout, encoding='utf-8')
out.fields = ds.fieldlist(src.field_names)
out.initialize()
for record in src.records():
  out.append(record)
src.finalize()
out.finalize()
예제 #3
0
 def output_fields(self):
     return ds.fieldlist(["i"])
예제 #4
0
import brewery.dq as dq
from chardet.universaldetector import UniversalDetector

filename = sys.argv[1]

detector = UniversalDetector()
for line in file(filename, 'rb'):
    detector.feed(line)
    if detector.done: break
detector.close()

src = ds.CSVDataSource(filename,
                       read_header=True,
                       encoding=detector.result["encoding"],
                       delimiter=',')
src.initialize()
if len(src.field_names) == 1:
    src.finalize()
    src = ds.CSVDataSource(filename,
                           read_header=True,
                           encoding=detector.result["encoding"],
                           delimiter=';')
    src.initialize()

out = ds.CSVDataTarget(sys.stdout, encoding='utf-8')
out.fields = ds.fieldlist(src.field_names)
out.initialize()
for record in src.records():
    out.append(record)
src.finalize()
out.finalize()