Beispiel #1
0
    def post(self):
        request_json = request.get_json(
        ) or None  # Get json data from the post request
        if request_json:
            # try to read the user security information
            token = request.headers['Authorization'] or None
            if not token:
                return gen_error_response(
                    401,
                    "Authenticate at https://eubrabigsea.dei.uc.pt/engine/api/checkin_data",
                    "DQAssessment")
            username, description = verify_token(token)
            if (username):
                # The user is logged correctly in the system
                isAutorized, description = verify_resource(
                    username, token, "DQAssessment",
                    "DataQuality")  # TODO: cambiare in spark_assessment
                if (isAutorized):
                    # The user is authorized to use the resource
                    isOk, description = verify_config_json(request_json)
                    if (not isOk):
                        return gen_error_response(400, description)
                    file_id, new = make_config_file(request)
                    if (new):
                        cur = get_db().cursor()
                        cur.execute(
                            'INSERT INTO assessment_file VALUES(?,?,CURRENT_TIMESTAMP);',
                            (username, file_id))
                        cur.close()
                    pid = submit_assessment(file_id, username)
                    if (pid):
                        cur = get_db().cursor()
                        cur.execute(
                            'INSERT INTO requests_assessment VALUES(?,?);',
                            (pid, file_id))
                        cur.close()
                        body = {}
                        body["response"] = "Job sucsesfully submitted"
                        body["job_uri"] = url_for('assessment', uuid=pid)
                        body["config_file_uri"] = url_for('config',
                                                          file_id=file_id)
                        body = json.dumps(body)
                        return Response(response=body,
                                        mimetype="application/json")

                else:
                    # The user is not authorized to use the resource
                    return gen_error_response(403, description, "DQAssessment")
            else:
                # The user is not logged correctly in the system
                return gen_error_response(401, description, "DQAssessment")

        else:
            return gen_error_response(400, "Malformed request")
Beispiel #2
0
 def get(self, file_id):
     file_path = "%s/%s.txt" % (HDFSCONFIGDIR, file_id)
     try:
         assessment_file = cat_file(file_path)
         if 'Content-Type' in request.headers and request.headers[
                 'Content-Type'] == 'text/plain':
             return assessment_file
         else:
             config_file = {}
             config_file['data'] = assessment_file
             config_file['id'] = file_id
             cur = get_db().cursor()
             rv = cur.execute(
                 'SELECT uuid FROM requests_assessment WHERE file_id = ?',
                 (str(file_id), )).fetchall()
             connected_assessment_list = []
             for a in rv:
                 connected_assessment_list.append(a[0])
             config_file['used_by'] = connected_assessment_list
             headers = {'Content-Type': 'text/html'}
             return make_response(
                 render_template('config_file.html',
                                 config_file=config_file), 200, headers)
     except Exception as e:
         print(e)
         return gen_error_response(404, "No such file")
Beispiel #3
0
 def get(self, uuid=None):
     if (not uuid):
         return gen_error_response(400, "You should provide a valid uuid")
     cur = get_db().cursor()
     rv = cur.execute('SELECT * FROM requests WHERE uuid = ?',
                      (uuid, )).fetchall()
     if (len(rv) == 0):
         return gen_error_response(404, 'Wrong uuid')
     profiling_details = cur.execute(
         'SELECT * FROM requests_profiling WHERE uuid = ?',
         (uuid, )).fetchall()
     if (len(profiling_details) == 0):
         return gen_error_response(404, 'Wrong uuid')
     args_string = profiling_details[0][1]
     profiling_data = {}
     profiling_data['user_id'] = rv[0][0]
     profiling_data['type'] = rv[0][1]
     profiling_data['uuid'] = rv[0][2]
     profiling_data['done'] = rv[0][3]
     profiling_data['submit_timestamp'] = rv[0][4]
     profiling_data['completed_timestamp'] = rv[0][5]
     profiling_data['args_dict'] = json.loads(args_string)
     profiling_data['error'] = rv[0][6]
     headers = {'Content-Type': 'text/html'}
     return make_response(
         render_template('profiling_detail.html',
                         profiling_data=profiling_data), 200, headers)
Beispiel #4
0
def submit_assessment(file_id, username):
    """Submit the assessment on the mesos cluster

    Args:
        file_id: Unique identifier of a configuration file (uuid4), the file must be properly stored on the appropriate HDFSCONFIGDIR location.
        username: The username of the user that is requesting the assessment.

    Returns:
        The function returns the unique identifier of the request (uuid4), None if the assessment has not been properly submitted.
    """
    try:
        profiler = '/spark/spark-2.2.0-bin-hadoop2.7/bin/spark-submit --master spark://master:7077 /bigSEA/DQAssessment.py '
        config_file = '%s/%s/%s.txt' % (HDFSURL, HDFSCONFIGDIR, file_id)
        raw_command = profiler + " " + config_file + " 1"
        args = shlex.split(raw_command)
        print(args)
        pid = str(uuid.uuid4())
        popen_and_call(on_assessment_done, pid, args)
        try:
            cur = get_db().cursor()
            cur.execute(
                'INSERT INTO requests VALUES (?,"ASSESSMENT",?, 0,CURRENT_TIMESTAMP,? , 0);',
                (username, str(pid), None))
            cur.close()
        except Exception as e:
            print(e)
        return pid
    except Exception as e:
        print(e)
        return None
Beispiel #5
0
def init():
    """Initialize the db creating tables."""
    db = get_db()
    with app.open_resource(DATABASE_SCHEMA, mode='r') as f:
        db.cursor().executescript(f.read())
    create_log_dir()
    create_config_dir()
    print("Database initialized and dir created.")
Beispiel #6
0
def submit_profiling(request, username):
    """Submit the profiling on the mesos cluster

    Args:
        request: The flask request that generated the profiling request.
            It must contain all the fields required to submit a profiling.
        username: The username of the user that is requesting the assessment.

    Returns:
        The function returns the unique identifier of the request (uuid4),
        None if the assessment has not been properly submitted.
    """
    try:
        profiler = '/spark/spark-2.2.0-bin-hadoop2.7/bin/spark-submit --master spark://master:7077 /bigSEA/DQProfiling.py '
        json = request.get_json()
        intervalString = ""

        if (isSetPattern(json)):
            input_file = "--pattern=" + json["pattern"]
            fromFile = json["from"]
            toFile = json["to"]
            interval = json["interval"]
            intervalString = fromFile + ";" + toFile + ";" + interval

        elif ("input" in json):
            input_file = json["input"]

        output_file = json["output"]
        timeliness = json["timeliness"]
        timeformat = json["time_format"]
        timeformat = '"' + timeformat + '"'
        consistency_rules = json["consistency_rules"]
        consistency_rules = '"' + consistency_rules + '"'
        raw_command = profiler + " " + input_file + " " + output_file + " " + \
            timeliness + " " + timeformat + " " + consistency_rules + " " + intervalString
        args = shlex.split(raw_command)
        print(args)
        pid = str(uuid.uuid4())
        popen_and_call(on_profiling_done, pid, args)
        try:
            cur = get_db().cursor()
            cur.execute(
                'INSERT INTO requests VALUES (?,"PROFILING",?, 0,CURRENT_TIMESTAMP, ?, 0);',
                (username, str(pid), None))
            cur.close()
        except Exception as e:
            print(e)
        return pid
    except Exception as e:
        print(e)
        return None
Beispiel #7
0
 def get(self):
     cur = get_db().cursor()
     rv = cur.execute('SELECT * FROM requests').fetchall()
     request_list = []
     for element in rv:
         if element[1] == 'ASSESSMENT':
             file_id = cur.execute(
                 'SELECT file_id FROM requests_assessment WHERE uuid = ?',
                 (element[2], )).fetchall()[0][0]
             element = element + (file_id, )
         request_list.append(element)
     headers = {'Content-Type': 'text/html'}
     return make_response(
         render_template('requests.html', request_list=request_list), 200,
         headers)
Beispiel #8
0
 def get(self, uuid):
     cur = get_db().cursor()
     rv = cur.execute('SELECT error, type FROM requests WHERE uuid = ?',
                      (uuid, )).fetchall()
     if (len(rv) == 0):
         return gen_error_response(404, 'No request with that uuid')
     error = rv[0][0]
     error_log = {}
     log_data = cat_file('%s/%s.txt' % (HDFSLOGDIR, uuid))
     if (not log_data):
         return gen_error_response(404, 'No log file for that request')
     error_log['id'] = uuid
     error_log['data'] = log_data
     error_log['type'] = rv[0][1]
     error_log['error'] = rv[0][0]
     return make_response(
         render_template('error_log.html', error_log=error_log), 200,
         {'Content-Type': 'text/html'})
Beispiel #9
0
    def post(self):
        request_json = request.get_json(
        )  # Get json data from the post request
        if request_json:
            # try to read the user security information
            token = request.headers['Authorization'] or None
            if not token:
                return gen_error_response(
                    401,
                    "Authenticate at https://eubrabigsea.dei.uc.pt/engine/api/checkin_data",
                    "DQProfiling")
            username, description = verify_token(token)
            if (username):
                # The user is logged correctly in the system
                isAutorized, description = verify_resource(
                    username, token, "DQProfiling", "DataQuality")
                if (isAutorized):
                    # The user is authorized to use the resource
                    pid = submit_profiling(request, username)
                    cur = get_db().cursor()
                    cur.execute('INSERT INTO requests_profiling VALUES(?,?);',
                                (pid, json.dumps(request_json)))
                    cur.close()
                    if (pid):
                        body = {}
                        body["response"] = "Job sucsesfully submitted"
                        body["job_uri"] = url_for('profiling', uuid=pid)
                        body = json.dumps(body)
                        return Response(response=body,
                                        mimetype="application/json")
                else:
                    # The user is not authorized to use the resource
                    return gen_error_response(403, description, "DQProfiling")
            else:
                # The user is not logged correctly in the system
                return gen_error_response(401, description, "DQProfiling")

        else:
            return gen_error_response(400, "Malformed request")
Beispiel #10
0
 def get(self, uuid=None):
     if (not uuid):
         return gen_error_response(400, "You should provide a valid uuid")
     cur = get_db().cursor()
     rv = cur.execute('SELECT * FROM requests WHERE uuid = ?',
                      (uuid, )).fetchall()
     if (len(rv) == 0):
         return gen_error_response(404, 'Wrong uuid')
     file_id = cur.execute(
         'SELECT file_id FROM requests_assessment WHERE uuid = ?',
         (uuid, )).fetchall()[0][0]
     assessment_data = {}
     assessment_data['user_id'] = rv[0][0]
     assessment_data['type'] = rv[0][1]
     assessment_data['uuid'] = rv[0][2]
     assessment_data['done'] = rv[0][3]
     assessment_data['submit_timestamp'] = rv[0][4]
     assessment_data['completed_timestamp'] = rv[0][5]
     assessment_data['error'] = rv[0][6]
     assessment_data['file_id'] = file_id
     headers = {'Content-Type': 'text/html'}
     return make_response(
         render_template('assessment_detail.html',
                         assessment_data=assessment_data), 200, headers)