def test_s3(self): s = "a,b\n1,a\n3,b\n" filename = 'tests/test_bq.csv' a = Babe().pull(string=s, format='csv', name='Test') a.push(filename=filename, format='csv', delimiter='\t', quotechar='|', encoding='utf8', bucket='bertrandtest', protocol='gs') b = Babe() b.push_bigquery(filename=filename, bucket='bertrandtest', project_id='bigquery-testing-1098', dataset_id='ladata', table_name='tests', schema=[ { "name": "entier", "type": "INTEGER", "mode": "REQUIRED" }, { "name": "string", "type": "STRING", "mode": "REQUIRED" } ])
def test_gs_load_from_kontagent(self): # export 1 full day bucket = 'bertrandtest' game = 'wordox' day = '20151021' hour = '14' table_name = '{}_{}'.format(game, day) filename = '{}.csv'.format(table_name + hour) result = time.strptime(day + ' ' + hour, '%Y%m%d %H') start_time = datetime(result.tm_year, result.tm_mon, result.tm_mday, result.tm_hour) end_time = start_time + timedelta(hours=1) a = Babe() a = a.pull_kontagent(start_time=start_time, sample_mode=False, end_time=end_time, KT_APPID='869fb4a24faa4c61b702ea137cbe16ad', discard_names=["PointSend"]) a = a.mapTo(decode_data, insert_fields=["decoded_data"]) a = a.filterColumns(keep_fields=v1) a = a.filter(lambda row: uid_type_check(row) is True) a.push(filename=filename, format='csv', delimiter='\t', quotechar='|', encoding='utf8', bucket=bucket, protocol='gs') a.push_bigquery(filename=filename, bucket=bucket, project_id='bigquery-testing-1098', dataset_id='ladata', table_name=table_name, schema=[ { "name": "date", "type": "STRING", "mode": "REQUIRED" }, { "name": "hour", "type": "INTEGER", "mode": "REQUIRED" }, { "name": "time", "type": "TIMESTAMP", "mode": "REQUIRED" }, { "name": "name", "type": "STRING", "mode": "REQUIRED" }, { "name": "uid", "type": "INTEGER" }, { "name": "st1", "type": "STRING" }, { "name": "st2", "type": "STRING" }, { "name": "st3", "type": "STRING" }, { "name": "channel_type", "type": "STRING" }, { "name": "value", "type": "INTEGER" }, { "name": "level", "type": "INTEGER" }, { "name": "recipients", "type": "STRING" }, { "name": "tracking_data", "type": "STRING" }, { "name": "data", "type": "STRING" } ], job_id='{}_{}'.format(start_time, end_time), num_retries=5)