def test_gz(self): a = Babe().pull(stream=StringIO(self.s), format='csv', name='Test') a.push(filename='test.csv.gz') b = Babe().pull(filename='test.csv.gz') buf = StringIO() b.push(stream=buf, format='csv') self.assertEquals(buf.getvalue(), self.s)
def test_join_none(self): a = Babe().pull(string=self.s1, format='csv') a = a.join(join_stream=Babe().pull(string=self.s2_bis, format='csv'), key='country', join_key='country_code', on_error=Babe.ON_ERROR_NONE) self.assertEquals(a.to_string(), self.sjoined_bis)
def test_zip(self): babe = Babe() a = babe.pull(stream=StringIO(self.s), format="csv") a.push(filename='tests/test.zip') b = Babe().pull(filename='tests/test.zip') buf = StringIO() b.push(stream=buf) self.assertEquals(buf.getvalue(), self.s)
def test_join(self): a = Babe().pull(stream=StringIO(self.s1), format='csv') a = a.join(join_stream=Babe().pull(stream=StringIO(self.s2), format='csv'), key='country', join_key='country_code') buf = StringIO() a.push(stream=buf, format='csv') self.assertEquals(buf.getvalue(), self.sjoined)
def test_s3(self): s = "a,b\n1,2\n3,4\n" a = Babe().pull(string=s, format='csv', name='Test') a.push(filename='test3.csv', bucket='florian-test', protocol="s3") b = Babe().pull(filename='test3.csv', name='Test', bucket='florian-test', protocol="s3") self.assertEquals(b.to_string(), s)
def test_join_none(self): a = Babe().pull(stream=StringIO(self.s1), format='csv') a = a.join(join_stream=Babe().pull(stream=StringIO(self.s2_bis), format='csv'), key='country', join_key='country_code', on_error=Babe.ON_ERROR_NONE) buf = StringIO() a.push(stream=buf, format='csv') self.assertEquals(buf.getvalue(), self.sjoined_bis)
def test_s3(self): s = "a,b\n1,2\n3,4\n" buf1 = StringIO(s) a = Babe().pull(stream=buf1, format='csv', name='Test') a.push(filename='test3.csv', bucket='florian-test', protocol="s3") b = Babe().pull(filename='test3.csv', name='Test', bucket='florian-test', protocol="s3") buf = StringIO() b.push(stream=buf, format='csv') self.assertEquals(buf.getvalue(), s)
def test_vectorwise(self): a = Babe().pull(string=self.s, format='csv') a = a.typedetect() a.push_sql(table='test_table', database_kind='vectorwise', database='pybabe_test', drop_table=True, create_table=True) b = Babe().pull_sql(database_kind='vectorwise', database='pybabe_test', table='test_table') self.assertEquals(b.to_string(), self.s)
def test_pushpull(self): a = Babe().pull(stream=StringIO(self.s2), format='csv', primary_key='rown') a = a.typedetect() a.push_mongo(db='pybabe_test', collection='test_pushpull', drop_collection=True) b = Babe().pull_mongo(db="pybabe_test", fields=['rown', 'f', 's'], collection='test_pushpull') buf = StringIO() b.push(stream=buf, format='csv') self.assertEquals(buf.getvalue(), self.s2)
def test_vectorwise(self): a = Babe().pull(stream=StringIO(self.s), format='csv') a = a.typedetect() a.push_sql(table='test_table', database_kind='vectorwise', database='pybabe_test', drop_table=True, create_table=True) b = Babe().pull_sql(database_kind='vectorwise', database='pybabe_test', table='test_table') buf = StringIO() b.push(stream=buf, format='csv', delimiter=',') self.assertEquals(buf.getvalue(), self.s)
def test_tuple(self): a = Babe().pull(stream=StringIO("a,b\n1,2:3\n4,5:6\n"), format="csv") a = a.flatMap( lambda row: [row._replace(b=i) for i in row.b.split(':')]) buf = StringIO() a.push(stream=buf, format="csv") self.assertEquals(buf.getvalue(), "a,b\n1,2\n1,3\n4,5\n4,6\n")
def test_rename(self): a = Babe().pull(stream=StringIO('a,b\n1,2\n3,4\n1,4\n'), format="csv").typedetect() a = a.rename(a="c") buf = StringIO() a.push(stream=buf, format='csv') self.assertEquals(buf.getvalue(), 'c,b\n1,2\n3,4\n1,4\n')
def test_min(self): a = Babe().pull(stream=StringIO('a,b\n1,2\n3,4\n1,4\n'), format="csv").typedetect() a = a.minN(column='a', n=2) buf = StringIO() a.push(stream=buf, format='csv') self.assertEquals(buf.getvalue(), 'a,b\n1,2\n1,4\n')
def test_filter_values(self): a = Babe().pull(stream=StringIO('a,b\n1,2\n3,4\n1,4\n'), format="csv").typedetect() a = a.filter_values(a=3, b=4) buf = StringIO() a.push(stream=buf, format="csv") self.assertEquals(buf.getvalue(), "a,b\n3,4\n")
def test_partition_s3(self): a = Babe().pull(stream=StringIO(self.s), format='csv') a = a.partition(field='date') a.push(protocol="s3", bucket="florian-test", format="csv", filename_template='foobar/$date.csv.gz')
def test_transpose(self): a = Babe().pull(stream=StringIO(self.s), format='csv', primary_key='city').transpose() buf = StringIO() a.push(stream=buf, format='csv') self.assertEquals(buf.getvalue(), self.s2)
def test_multi(self): a = Babe() a = a.pull(stream=StringIO(self.s), format='csv').pull(stream=StringIO(self.s), format='csv') buf = StringIO() a.push(stream=buf, format='csv') self.assertEquals(buf.getvalue(), self.s + self.s)
def test_user_agent(self): a = Babe().pull(string=self.s, format="csv") a = a.user_agent(field="useragent", output_os="os", output_browser="browser", output_browser_version="browser_version") self.assertEquals(a.to_string(), self.s2)
def test_filter2(self): a = Babe().pull(stream=StringIO('a,b\n1,2\n3,4\n1,4\n'), format="csv").typedetect() a = a.filterColumns(remove_fields=['a']) buf = StringIO() a.push(stream=buf, format="csv") self.assertEquals(buf.getvalue(), "b\n2\n4\n4\n")
def test_groupby(self): a = Babe().pull(string='a,b\n1,2\n3,4\n1,4\n', format="csv").typedetect() a = a.group(key="a", reducer=lambda key, rows: (key, sum([row.b for row in rows]))) self.assertEquals(a.to_string(), "a,b\n1,6\n3,4\n")
def test_ftp(self): babe = Babe() a = babe.pull('tests/test.csv', name='Test') a.push(filename='test.csv', protocol='ftp', host='localhost', port=self.port)
def test_bulk(self): a = Babe().pull(stream=StringIO(self.s), format="csv") a = a.typedetect() a = a.bulkMapTo(lambda list: [[sum([r.a for r in list])]] * len(list), bulk_size=2, insert_fields=["b"]) self.assertEquals(a.to_string(), self.s2)
def test_null(self): a = Babe().pull(stream=StringIO(self.s), format='csv', null_value="NULL") buf = StringIO() a = a.push(stream=buf, format="csv") self.assertEquals(buf.getvalue(), self.s2)
def test_filter(self): a = Babe().pull(stream=StringIO('a,b\n1,2\n3,4\n1,4\n'), format="csv").typedetect() a = a.filter(function=lambda x: x.a == 3) buf = StringIO() a.push(stream=buf, format='csv') self.assertEquals(buf.getvalue(), 'a,b\n3,4\n')
def test_replace(self): a = Babe().pull(filename='tests/test.csv', name='Test').typedetect() a = a.mapTo(lambda row: [row.foo + 1, row.bar * 2], fields=['a', 'b']) buf = StringIO() a.push(stream=buf, format='csv') s = """a,b\n2,4\n4,8\n""" self.assertEquals(buf.getvalue(), s)
def test_insert(self): a = Babe().pull(filename='tests/test.csv', name='Test').typedetect() a = a.mapTo(lambda row: row.foo + 1, insert_fields=['fooplus']) s = """foo,bar,f,d,fooplus 1,2,3.2,2010/10/02,2 3,4,1.2,2011/02/02,4 """ self.assertEquals(a.to_string(), s)
def test_tuple(self): a = Babe().pull(filename='tests/test.csv', name='Test').typedetect() a = a.mapTo(lambda obj: obj._replace(foo=obj.foo + 1)) s = """foo,bar,f,d 2,2,3.2,2010/10/02 4,4,1.2,2011/02/02 """ self.assertEquals(a.to_string(), s)
def test_sort(self): babe = Babe() s = '\n'.join(['k,v'] + ['%u,%u' % (i, -i) for i in xrange(0, 10001)]) a = babe.pull(string=s, name='test', format='csv') a = a.typedetect() a = a.sort(field='v') a = a.head(n=1) self.assertEquals(a.to_string(), 'k,v\n10000,-10000\n')
def test_mail(self): a = Babe().pull(stream=StringIO(self.s1), source="Table 1", format='csv') a = a.pull(stream=StringIO(self.s2), source="Table 2", format='csv') a.mail(subject="Test", recipients="*****@*****.**", in_body=True)
def test_sqldump(self): a = Babe().pull(stream=StringIO(self.s), format='sql', table='foobar', fields=['id', 'number', 'title', 'datetime']) buf = StringIO() a.push(stream=buf, format='csv') self.assertEquals(buf.getvalue(), self.s2)