コード例 #1
0
ファイル: test_cli.py プロジェクト: blawson/dataforj
 def test_init(self):
     cli.init(dir_path, 'ut', 'desc')
     with open(file_name, 'r+') as f:
         yaml = '\n'.join(f.readlines())
         flow = dataflow.from_yaml(yaml)
         flow2 = Dataflow.from_python_objects('ut', 'desc', [])
         self.assertEqual(flow._steps, flow2._steps)
コード例 #2
0
ファイル: cli.py プロジェクト: blawson/dataforj
def add_source_step(dir: str, name: str, uri: str, format_type: str):
    file_name = f'{dir}/dataforj.yaml'
    if not os.path.exists(file_name):
        raise Exception('There is no Dataforj project in this directory')
    else:
        with open(file_name, 'r+') as f:
            yaml = '\n'.join(f.readlines())
            flow = dataflow.from_yaml(yaml)
            updated_flow = api.add_source_step(flow, name, uri, format_type)
            f.seek(0)
            f.write(updated_flow.to_yaml())
            f.truncate()
コード例 #3
0
ファイル: cli.py プロジェクト: blawson/dataforj
def debug_step(dir: str, env_name: str, step: str):
    project_file_name = f'{dir}/dataforj.yaml'
    if not os.path.exists(project_file_name):
        raise Exception(f'There is no Dataforj project in the directory \
                          [{dir}]')
    else:
        with open(project_file_name, 'r+') as project_file:
            project_yaml = '\n'.join(project_file.readlines())
            env = DataforjEnv('flow.name', env_name)
            yaml_plus_vars = project_yaml \
                .format_map(env.env_config['dataflow-config'])
            flow = dataflow.from_yaml(yaml_plus_vars)
            flow.debug_step(env, step)
コード例 #4
0
ファイル: cli.py プロジェクト: blawson/dataforj
def add_pyspark_step(dir: str, name: str, depends_on: list,
                     pyspark_file_path: str):
    file_name = f'{dir}/dataforj.yaml'
    if not os.path.exists(file_name):
        raise Exception('There is no Dataforj project in this directory')
    else:
        with open(file_name, 'r+') as f:
            yaml = '\n'.join(f.readlines())
            flow = dataflow.from_yaml(yaml)
            updated_flow = api.add_pyspark_step(flow, name, depends_on,
                                                pyspark_file_path)
            f.seek(0)
            f.write(updated_flow.to_yaml())
            f.truncate()
コード例 #5
0
ファイル: cli.py プロジェクト: blawson/dataforj
def open_flow(dir: str, env_name: str):
    '''
    open a project in the directory provided
    '''
    file_name = f'{dir}/dataforj.yaml'
    if not os.path.exists(file_name):
        raise Exception('There is no Dataforj project in this directory')
    else:
        with open(file_name, 'r+') as f:
            yaml = '\n'.join(f.readlines())
            env = DataforjEnv('flow.name', env_name)
            yaml_plus_vars = yaml \
                .format_map(env.env_config['dataflow-config'])
            flow = dataflow.from_yaml(yaml_plus_vars)
            return flow
コード例 #6
0
 def test_from_yaml(self):
     yaml_flow = dataflow.from_yaml(simple_yaml_text)
     self.assertEqual(yaml_flow.to_yaml(), flow_simple.to_yaml())