def add_pyspark(request): # if this is a POST request we need to process the form data if request.method == 'POST': # create a form instance and populate it with data from the request: form = AddPySparkForm(request.POST, flow=get_flow()) if form.is_valid(): step_name = form.cleaned_data['step_name'] step_description = form.cleaned_data['step_description'] depends_ons = form.cleaned_data['depends_ons'] pyspark_file_path = form.cleaned_data['pyspark_file_path'] create_file(pyspark_file_path, f'return {depends_ons[0]}_df') updated_flow = api.add_pyspark_step( flow=get_flow(), name=step_name, depends_on=depends_ons, description=step_description, pyspark_file_path=pyspark_file_path) set_flow(updated_flow) return HttpResponseRedirect('/dag/view') # if a GET (or any other method) we'll create a blank form else: form = AddPySparkForm(flow=get_flow()) return render(request, 'dag/add_pyspark.html', {'form': form})
def run_sql_submit(request): # if this is a POST request we need to process the form data if request.method == 'POST': # create a form instance and populate it with data from the request: form = RunSqlForm(request.POST) if form.is_valid(): env = DataforjEnv(get_flow().name, 'local') step_name = form.cleaned_data['step_name'] sql_text = form.cleaned_data['sql'] get_flow()._steps[step_name].sql_from_editor = sql_text error_message = None header_columns = None rows = None try: spark_df = get_flow().dataframe_for_step(env, step_name) header_columns = spark_df.schema.names rows = spark_df.rdd.map(lambda row: row.asDict(True)).take(10) except Exception as e: error_message = str(e) template = loader.get_template('run/run_sql.html') context = { 'step_name': step_name, 'sql_text': sql_text, 'error_message': error_message, 'header_columns': header_columns, 'rows': rows } return HttpResponse(template.render(context, request)) # if a GET (or any other method) we'll create a blank form else: form = RunSqlForm() return render(request, 'run/run_sql.html', {'form': form})
def add_sql(request): # if this is a POST request we need to process the form data if request.method == 'POST': # create a form instance and populate it with data from the request: form = AddSqlForm(request.POST, flow=get_flow()) if form.is_valid(): step_name = form.cleaned_data['step_name'] step_description = form.cleaned_data['step_description'] depends_ons = form.cleaned_data['depends_ons'] sql_table_names = ', '.join( [f'{table} {table}' for table in depends_ons]) sql_select = ', '.join([f'{table}.*' for table in depends_ons]) sql_query = f'select {sql_select} from {sql_table_names}' sql_file_path = form.cleaned_data['sql_file_path'] create_file(sql_file_path, sql_query) updated_flow = api.add_sql_step(flow=get_flow(), name=step_name, depends_on=depends_ons, description=step_description, sql_file_path=sql_file_path) set_flow(updated_flow) return HttpResponseRedirect('/dag/view') # if a GET (or any other method) we'll create a blank form else: form = AddSqlForm(flow=get_flow()) return render(request, 'dag/add_sql.html', {'form': form})
def add_sink(request): # if this is a POST request we need to process the form data if request.method == 'POST': # create a form instance and populate it with data from the request: form = AddSinkForm(request.POST, flow=get_flow()) if form.is_valid(): step_name = form.cleaned_data['step_name'] step_description = form.cleaned_data['step_description'] depends_ons = form.cleaned_data['depends_ons'] uri = form.cleaned_data['uri'] format_type = form.cleaned_data['format_type'] mode = form.cleaned_data['mode'] options = parse_options(form.cleaned_data['options_text']) updated_flow = api.add_sink_step(flow=get_flow(), name=step_name, depends_on=[depends_ons], description=step_description, uri=uri, format_type=format_type, mode=mode, options=options) set_flow(updated_flow) return HttpResponseRedirect('/dag/view') # if a GET (or any other method) we'll create a blank form else: form = AddSinkForm(flow=get_flow()) return render(request, 'dag/add_sink.html', {'form': form})
def run_step(step_name: str, request): step_type = get_flow()._steps[step_name].dagre_type() print(f'step_type = [{step_type}]') if step_type == 'SQL': return run_sql(step_name, request) if step_type == 'PySpark': return run_pyspark(step_name, request) else: return run_basic_step(step_name, request)
def view(request): template = loader.get_template('dag/view.html') flow_types = { step.name: step.dagre_type() for step in get_flow()._steps.values() } flow_descs = { step.name: step.description for step in get_flow()._steps.values() } context = { 'name': get_flow().name, 'description': get_flow().description, 'type_dict_dson': json.dumps(flow_types), 'desc_dict_dson': json.dumps(flow_descs), 'dagre_nodes_edges_js': get_flow().to_dagre_nodes_edges() } return HttpResponse(template.render(context, request))
def run_basic_step(step_name: str, request): env = DataforjEnv(get_flow().name, 'local') error_message = None header_columns = None rows = None try: spark_df = get_flow().dataframe_for_step(env, step_name) header_columns = spark_df.schema.names rows = spark_df.rdd.map(lambda row: row.asDict(True)).take(10) except Exception as e: error_message = str(e) template = loader.get_template('run/run_step.html') context = { 'error_message': error_message, 'header_columns': header_columns, 'rows': rows } return HttpResponse(template.render(context, request))
def delete_step(request): # if this is a POST request we need to process the form data if request.method == 'POST': # create a form instance and populate it with data from the request: form = SelectedStepForm(request.POST) if form.is_valid(): step_name = form.cleaned_data['step_name'] updated_flow = api.remove_step(flow=get_flow(), name=step_name) set_flow(updated_flow) return HttpResponseRedirect('/dag/view') else: return HttpResponseRedirect('/dag/view')
def add_source(request): # if this is a POST request we need to process the form data if request.method == 'POST': # create a form instance and populate it with data from the request: form = AddSourceForm(request.POST) # check whether it's valid: if form.is_valid(): options_dict = parse_options(form.cleaned_data['options_text']) updated_flow = api.add_source_step( flow=get_flow(), name=form.cleaned_data['step_name'], description=form.cleaned_data['step_description'], uri=form.cleaned_data['uri'], format_type=form.cleaned_data['format_type'], options=options_dict) set_flow(updated_flow) return HttpResponseRedirect('/dag/view') # if a GET (or any other method) we'll create a blank form else: form = AddSourceForm() return render(request, 'dag/add_source.html', {'form': form})