def post(self): request_json = request.get_json( ) or None # Get json data from the post request if request_json: # try to read the user security information token = request.headers['Authorization'] or None if not token: return gen_error_response( 401, "Authenticate at https://eubrabigsea.dei.uc.pt/engine/api/checkin_data", "DQAssessment") username, description = verify_token(token) if (username): # The user is logged correctly in the system isAutorized, description = verify_resource( username, token, "DQAssessment", "DataQuality") # TODO: cambiare in spark_assessment if (isAutorized): # The user is authorized to use the resource isOk, description = verify_config_json(request_json) if (not isOk): return gen_error_response(400, description) file_id, new = make_config_file(request) if (new): cur = get_db().cursor() cur.execute( 'INSERT INTO assessment_file VALUES(?,?,CURRENT_TIMESTAMP);', (username, file_id)) cur.close() pid = submit_assessment(file_id, username) if (pid): cur = get_db().cursor() cur.execute( 'INSERT INTO requests_assessment VALUES(?,?);', (pid, file_id)) cur.close() body = {} body["response"] = "Job sucsesfully submitted" body["job_uri"] = url_for('assessment', uuid=pid) body["config_file_uri"] = url_for('config', file_id=file_id) body = json.dumps(body) return Response(response=body, mimetype="application/json") else: # The user is not authorized to use the resource return gen_error_response(403, description, "DQAssessment") else: # The user is not logged correctly in the system return gen_error_response(401, description, "DQAssessment") else: return gen_error_response(400, "Malformed request")
def get(self, file_id): file_path = "%s/%s.txt" % (HDFSCONFIGDIR, file_id) try: assessment_file = cat_file(file_path) if 'Content-Type' in request.headers and request.headers[ 'Content-Type'] == 'text/plain': return assessment_file else: config_file = {} config_file['data'] = assessment_file config_file['id'] = file_id cur = get_db().cursor() rv = cur.execute( 'SELECT uuid FROM requests_assessment WHERE file_id = ?', (str(file_id), )).fetchall() connected_assessment_list = [] for a in rv: connected_assessment_list.append(a[0]) config_file['used_by'] = connected_assessment_list headers = {'Content-Type': 'text/html'} return make_response( render_template('config_file.html', config_file=config_file), 200, headers) except Exception as e: print(e) return gen_error_response(404, "No such file")
def get(self, uuid=None): if (not uuid): return gen_error_response(400, "You should provide a valid uuid") cur = get_db().cursor() rv = cur.execute('SELECT * FROM requests WHERE uuid = ?', (uuid, )).fetchall() if (len(rv) == 0): return gen_error_response(404, 'Wrong uuid') profiling_details = cur.execute( 'SELECT * FROM requests_profiling WHERE uuid = ?', (uuid, )).fetchall() if (len(profiling_details) == 0): return gen_error_response(404, 'Wrong uuid') args_string = profiling_details[0][1] profiling_data = {} profiling_data['user_id'] = rv[0][0] profiling_data['type'] = rv[0][1] profiling_data['uuid'] = rv[0][2] profiling_data['done'] = rv[0][3] profiling_data['submit_timestamp'] = rv[0][4] profiling_data['completed_timestamp'] = rv[0][5] profiling_data['args_dict'] = json.loads(args_string) profiling_data['error'] = rv[0][6] headers = {'Content-Type': 'text/html'} return make_response( render_template('profiling_detail.html', profiling_data=profiling_data), 200, headers)
def submit_assessment(file_id, username): """Submit the assessment on the mesos cluster Args: file_id: Unique identifier of a configuration file (uuid4), the file must be properly stored on the appropriate HDFSCONFIGDIR location. username: The username of the user that is requesting the assessment. Returns: The function returns the unique identifier of the request (uuid4), None if the assessment has not been properly submitted. """ try: profiler = '/spark/spark-2.2.0-bin-hadoop2.7/bin/spark-submit --master spark://master:7077 /bigSEA/DQAssessment.py ' config_file = '%s/%s/%s.txt' % (HDFSURL, HDFSCONFIGDIR, file_id) raw_command = profiler + " " + config_file + " 1" args = shlex.split(raw_command) print(args) pid = str(uuid.uuid4()) popen_and_call(on_assessment_done, pid, args) try: cur = get_db().cursor() cur.execute( 'INSERT INTO requests VALUES (?,"ASSESSMENT",?, 0,CURRENT_TIMESTAMP,? , 0);', (username, str(pid), None)) cur.close() except Exception as e: print(e) return pid except Exception as e: print(e) return None
def init(): """Initialize the db creating tables.""" db = get_db() with app.open_resource(DATABASE_SCHEMA, mode='r') as f: db.cursor().executescript(f.read()) create_log_dir() create_config_dir() print("Database initialized and dir created.")
def submit_profiling(request, username): """Submit the profiling on the mesos cluster Args: request: The flask request that generated the profiling request. It must contain all the fields required to submit a profiling. username: The username of the user that is requesting the assessment. Returns: The function returns the unique identifier of the request (uuid4), None if the assessment has not been properly submitted. """ try: profiler = '/spark/spark-2.2.0-bin-hadoop2.7/bin/spark-submit --master spark://master:7077 /bigSEA/DQProfiling.py ' json = request.get_json() intervalString = "" if (isSetPattern(json)): input_file = "--pattern=" + json["pattern"] fromFile = json["from"] toFile = json["to"] interval = json["interval"] intervalString = fromFile + ";" + toFile + ";" + interval elif ("input" in json): input_file = json["input"] output_file = json["output"] timeliness = json["timeliness"] timeformat = json["time_format"] timeformat = '"' + timeformat + '"' consistency_rules = json["consistency_rules"] consistency_rules = '"' + consistency_rules + '"' raw_command = profiler + " " + input_file + " " + output_file + " " + \ timeliness + " " + timeformat + " " + consistency_rules + " " + intervalString args = shlex.split(raw_command) print(args) pid = str(uuid.uuid4()) popen_and_call(on_profiling_done, pid, args) try: cur = get_db().cursor() cur.execute( 'INSERT INTO requests VALUES (?,"PROFILING",?, 0,CURRENT_TIMESTAMP, ?, 0);', (username, str(pid), None)) cur.close() except Exception as e: print(e) return pid except Exception as e: print(e) return None
def get(self): cur = get_db().cursor() rv = cur.execute('SELECT * FROM requests').fetchall() request_list = [] for element in rv: if element[1] == 'ASSESSMENT': file_id = cur.execute( 'SELECT file_id FROM requests_assessment WHERE uuid = ?', (element[2], )).fetchall()[0][0] element = element + (file_id, ) request_list.append(element) headers = {'Content-Type': 'text/html'} return make_response( render_template('requests.html', request_list=request_list), 200, headers)
def get(self, uuid): cur = get_db().cursor() rv = cur.execute('SELECT error, type FROM requests WHERE uuid = ?', (uuid, )).fetchall() if (len(rv) == 0): return gen_error_response(404, 'No request with that uuid') error = rv[0][0] error_log = {} log_data = cat_file('%s/%s.txt' % (HDFSLOGDIR, uuid)) if (not log_data): return gen_error_response(404, 'No log file for that request') error_log['id'] = uuid error_log['data'] = log_data error_log['type'] = rv[0][1] error_log['error'] = rv[0][0] return make_response( render_template('error_log.html', error_log=error_log), 200, {'Content-Type': 'text/html'})
def post(self): request_json = request.get_json( ) # Get json data from the post request if request_json: # try to read the user security information token = request.headers['Authorization'] or None if not token: return gen_error_response( 401, "Authenticate at https://eubrabigsea.dei.uc.pt/engine/api/checkin_data", "DQProfiling") username, description = verify_token(token) if (username): # The user is logged correctly in the system isAutorized, description = verify_resource( username, token, "DQProfiling", "DataQuality") if (isAutorized): # The user is authorized to use the resource pid = submit_profiling(request, username) cur = get_db().cursor() cur.execute('INSERT INTO requests_profiling VALUES(?,?);', (pid, json.dumps(request_json))) cur.close() if (pid): body = {} body["response"] = "Job sucsesfully submitted" body["job_uri"] = url_for('profiling', uuid=pid) body = json.dumps(body) return Response(response=body, mimetype="application/json") else: # The user is not authorized to use the resource return gen_error_response(403, description, "DQProfiling") else: # The user is not logged correctly in the system return gen_error_response(401, description, "DQProfiling") else: return gen_error_response(400, "Malformed request")
def get(self, uuid=None): if (not uuid): return gen_error_response(400, "You should provide a valid uuid") cur = get_db().cursor() rv = cur.execute('SELECT * FROM requests WHERE uuid = ?', (uuid, )).fetchall() if (len(rv) == 0): return gen_error_response(404, 'Wrong uuid') file_id = cur.execute( 'SELECT file_id FROM requests_assessment WHERE uuid = ?', (uuid, )).fetchall()[0][0] assessment_data = {} assessment_data['user_id'] = rv[0][0] assessment_data['type'] = rv[0][1] assessment_data['uuid'] = rv[0][2] assessment_data['done'] = rv[0][3] assessment_data['submit_timestamp'] = rv[0][4] assessment_data['completed_timestamp'] = rv[0][5] assessment_data['error'] = rv[0][6] assessment_data['file_id'] = file_id headers = {'Content-Type': 'text/html'} return make_response( render_template('assessment_detail.html', assessment_data=assessment_data), 200, headers)