def __init__(self, path, flag='_SUCCESS', client=None, gcs_client=None): self.client = client or BigqueryClient() self.path = path self.wildcard = False if '*' in path: self.wildcard = True directory = os.path.dirname(path) assert '*' not in directory, \ 'Wildcard (*) only supported in filename %s' % (path,) self._target = gcs.GCSTarget(os.path.join(directory, flag), client=gcs_client) else: self._target = gcs.GCSTarget(path, client=gcs_client)
def test_get_target_path(self): bq_target = bigquery.BigQueryTarget("p", "d", "t", client="fake_client") self.assertEqual(SimpleTestTask.get_target_path(bq_target), "p:d.t") gcs_target = gcs.GCSTarget("gs://foo/bar.txt", client="fake_client") self.assertEqual(SimpleTestTask.get_target_path(gcs_target), "gs://foo/bar.txt") with self.assertRaises(ValueError): SimpleTestTask.get_target_path("not_a_target")
def output(_): return gcs.GCSTarget(self.gcs_dir_url)
def create_target(self, format=None): return gcs.GCSTarget(bucket_url(self.id()), format=format, client=self.client)
def run(self): with open(self.source, 'r') as infile: with gcs.GCSTarget(self.destination, client=self.client).open(mode='w') as outfile: outfile.write(infile.read())
def output(self): return gcs.GCSTarget(self.destination, client=self.client)
def output(self): output_path_template = '{}/{}/data/{date:%Y-%m-%d}.csv' output_path = output_path_template.format(BUCKET_PATH, BUCKET_SUBDIR, date=self.date) return luigi_gcs.GCSTarget(output_path, client=GCS_CLIENT)
def output(self): return gcs.GCSTarget("gs://snowplow_tracker/%s_%s.json.gz" % (self.file_root, self.dataset_date.strftime("%Y$m%d")))