コード例 #1
0
 def test_filter2(self):
     a = Babe().pull(stream=StringIO('a,b\n1,2\n3,4\n1,4\n'),
                     format="csv").typedetect()
     a = a.filterColumns(remove_fields=['a'])
     buf = StringIO()
     a.push(stream=buf, format="csv")
     self.assertEquals(buf.getvalue(), "b\n2\n4\n4\n")
コード例 #2
0
ファイル: tests.py プロジェクト: nizox/PyBabe
 def test_twitter(self):
     a = Babe().pull_twitter()
     a = a.filterColumns(keep_fields=
     ["author_name", "author_id", "author_screen_name", "created_at", "hashtags", "text", "in_reply_to_status_id_str"])
     a = a.typedetect()
     buf = StringIO()
     a.push(stream=buf, format='csv')
コード例 #3
0
 def test_twitter(self):
     a = Babe().pull_twitter()
     a = a.filterColumns(keep_fields=[
         "author_name", "author_id", "author_screen_name", "created_at",
         "hashtags", "text", "in_reply_to_status_id_str"
     ])
     a = a.typedetect()
     a.to_string()
コード例 #4
0
 def test_filter2(self):
     a = Babe().pull(stream=StringIO('a,b\n1,2\n3,4\n1,4\n'),
                     format="csv").typedetect()
     a = a.filterColumns(remove_fields=['a'])
     self.assertEquals(a.to_string(), "b\n2\n4\n4\n")
コード例 #5
0
ファイル: test_transform.py プロジェクト: fdouetteau/PyBabe
 def test_filter2(self):
      a = Babe().pull(stream=StringIO('a,b\n1,2\n3,4\n1,4\n'), format="csv").typedetect()
      a = a.filterColumns(remove_fields=['a'])
      self.assertEquals(a.to_string(), "b\n2\n4\n4\n")
コード例 #6
0
    def test_gs_load_from_kontagent(self):
        # export 1 full day
        bucket = 'bertrandtest'
        game = 'wordox'
        day = '20151021'
        hour = '14'
        table_name = '{}_{}'.format(game, day)
        filename = '{}.csv'.format(table_name + hour)
        result = time.strptime(day + ' ' + hour, '%Y%m%d %H')
        start_time = datetime(result.tm_year,
                              result.tm_mon,
                              result.tm_mday,
                              result.tm_hour)
        end_time = start_time + timedelta(hours=1)

        a = Babe()
        a = a.pull_kontagent(start_time=start_time,
                             sample_mode=False,
                             end_time=end_time,
                             KT_APPID='869fb4a24faa4c61b702ea137cbe16ad',
                             discard_names=["PointSend"])
        a = a.mapTo(decode_data, insert_fields=["decoded_data"])
        a = a.filterColumns(keep_fields=v1)
        a = a.filter(lambda row: uid_type_check(row) is True)
        a.push(filename=filename,
               format='csv',
               delimiter='\t',
               quotechar='|',
               encoding='utf8',
               bucket=bucket,
               protocol='gs')

        a.push_bigquery(filename=filename,
                        bucket=bucket,
                        project_id='bigquery-testing-1098',
                        dataset_id='ladata',
                        table_name=table_name,
                        schema=[
                            {
                                "name": "date",
                                "type": "STRING",
                                "mode": "REQUIRED"
                            },
                            {
                                "name": "hour",
                                "type": "INTEGER",
                                "mode": "REQUIRED"
                            },
                            {
                                "name": "time",
                                "type": "TIMESTAMP",
                                "mode": "REQUIRED"
                            },
                            {
                                "name": "name",
                                "type": "STRING",
                                "mode": "REQUIRED"
                            },
                            {
                                "name": "uid",
                                "type": "INTEGER"
                            },
                            {
                                "name": "st1",
                                "type": "STRING"
                            },
                            {
                                "name": "st2",
                                "type": "STRING"
                            },
                            {
                                "name": "st3",
                                "type": "STRING"
                            },
                            {
                                "name": "channel_type",
                                "type": "STRING"
                            },
                            {
                                "name": "value",
                                "type": "INTEGER"
                            },
                            {
                                "name": "level",
                                "type": "INTEGER"
                            },
                            {
                                "name": "recipients",
                                "type": "STRING"
                            },
                            {
                                "name": "tracking_data",
                                "type": "STRING"
                            },
                            {
                                "name": "data",
                                "type": "STRING"
                            }
                        ],
                        job_id='{}_{}'.format(start_time, end_time),
                        num_retries=5)
コード例 #7
0
ファイル: tests.py プロジェクト: nizox/PyBabe
 def test_filter2(self):
      a = Babe().pull(stream=StringIO('a,b\n1,2\n3,4\n1,4\n'), format="csv").typedetect()
      a = a.filterColumns(remove_fields=['a'])
      buf = StringIO()
      a.push(stream=buf, format="csv")
      self.assertEquals(buf.getvalue(), "b\n2\n4\n4\n")