Ejemplo n.º 1
0
 def test_max(self):
     a = Babe().pull(stream=StringIO('a,b\n1,2\n3,4\n1,4\n'),
                     format="csv").typedetect()
     a = a.maxN(column='b', n=2)
     buf = StringIO()
     a.push(stream=buf, format='csv')
     self.assertEquals(buf.getvalue(), 'a,b\n3,4\n1,4\n')
Ejemplo n.º 2
0
def wordcount():
    a = Babe().pull(protocol='http',
                    host='www.ietf.org',
                    filename='rfc/rfc1149.txt')
    a = a.flatMap(lambda row: [(w, 1) for w in re.findall('\w+', row.text)],
                  columns=['word', 'count'])
    a = a.groupBy(key='word',
                  reducer=lambda word, rows:
                  (word, sum([row.count for row in rows])))
    a = a.maxN(column='count', n=10)
    a.push(stream=sys.stdout, format='csv')
Ejemplo n.º 3
0
def wordcount():
    a = Babe().pull(protocol='http',
                    host='www.ietf.org',
                    filename='rfc/rfc1149.txt')
    a = a.flatMap(lambda row: [(w, 1) for w in re.findall('\w+', row.text)],
                  columns=['word', 'count'])
    a = a.groupBy(key='word',
                  reducer=lambda word, rows: (word, sum([row.count for row in rows])))
    a = a.maxN(column='count',
               n=10)
    a.push(stream=sys.stdout,
           format='csv')
Ejemplo n.º 4
0
 def test_max(self):
     a = Babe().pull(stream=StringIO('a,b\n1,2\n3,4\n1,4\n'),
                     format="csv").typedetect()
     a = a.maxN(column='b', n=2)
     self.assertEquals(a.to_string(), 'a,b\n3,4\n1,4\n')
Ejemplo n.º 5
0
 def test_max(self):
     a = Babe().pull(stream=StringIO('a,b\n1,2\n3,4\n1,4\n'), format="csv").typedetect()
     a = a.maxN(column='b', n=2)
     self.assertEquals(a.to_string(), 'a,b\n3,4\n1,4\n')
Ejemplo n.º 6
0
 def test_max(self):
     a = Babe().pull(stream=StringIO('a,b\n1,2\n3,4\n1,4\n'), format="csv").typedetect()
     a = a.maxN(column='b', n=2)
     buf = StringIO()
     a.push(stream=buf, format='csv')
     self.assertEquals(buf.getvalue(), 'a,b\n3,4\n1,4\n')