def authenticate_user(): try: data = request.get_json() current_user = User.find_by_username(data['username']) if not current_user: return response_with(resp.SERVER_ERROR_404) if User.verify_hash(data['password'], current_user.password): access_token = create_access_token(identity=data['username']) return response_with( resp.SUCCESS_201, value={ 'message': 'Logged in as {}'.format(current_user.username), "token": access_token, "name": current_user.username, "avatar": 'https://wpimg.wallstcn.com/f778738c-e4f8-4870-b634-56703b4acafe.gif' }) else: return response_with(resp.UNAUTHORIZED_401) except Exception as e: print(e) return response_with(resp.INVALID_INPUT_422)
def get_all_metrics_by_task(task): result = get_metrics_by_task(task) if len(result) > 0: return response_with(resp.SUCCESS_200, value={"result": result}) else: return response_with(resp.SERVER_ERROR_404, value={"result": result})
def authenticate_dev_user(): try: data = request.get_json() current_user = {} if data.get('email'): current_user = User.find_by_email(data['email']) elif data.get('username'): current_user = User.find_by_username(data['username']) if not current_user: return response_with(resp.SERVER_ERROR_404) if current_user and not current_user['is_verified']: return response_with(resp.BAD_REQUEST_400) if User.verify_hash(data['password'], current_user['password']): # JWT_ACCESS_TOKEN_EXPIRES en desarrollo el token no expira. access_token = create_access_token( identity=current_user['username'], expires_delta=False) return response_with(resp.SUCCESS_200, value={ 'message': 'Logged in as admin', "access_token": access_token }) else: return response_with(resp.UNAUTHORIZED_401) except Exception as e: print(e) return response_with(resp.INVALID_INPUT_422)
def get_experiment_metrics(experiment_id): # Stores the execution start time to calculate the time it takes for the module to execute. initial_time = time.time() # MongoDB data. mongodb_client: MongoClient = get_data_client() db = mongodb_client[os.environ.get('TASKS_DATABASE_NAME', DevelopmentConfig.TASKS_DATABASE_NAME)] col = db[os.environ.get('EXPERIMENTS_COLLECTION_NAME', DevelopmentConfig.EXPERIMENTS_COLLECTION_NAME)] query = { 'task_id': str(experiment_id), 'task_action.evaluation.metrics': { '$exists': True } } projection = {'_id': 0, 'metrics': '$task_action.evaluation.metrics'} data = col.find(query, projection) result = list(data) print(len(result)) final_time = time.time() log.info(f"Total execution time = {final_time - initial_time} seconds") if len(result) > 0: return response_with(resp.SUCCESS_200, value={"result": result}) else: return response_with(resp.SERVER_ERROR_404, value={"result": result})
def get_best_model_by_task_and_corpus(task, corpus): # Stores the execution start time to calculate the time it takes for the module to execute. initial_time = time.time() parameter_space = {'dataset': {'task': task, 'corpus': corpus}} request_args = [i for i in request.args.keys() ] if request.args is not None else [] objective = request.args['objective'].split( ',') if 'objective' in request_args else ['accuracy'] tuner = GridSearch(objective=objective, parameter_space=parameter_space) try: models = tuner.get_best_models(num_models=1) except (ValueError, AssertionError): return response_with(resp.SERVER_ERROR_404, value={"result": []}) result = [{'task_id': models[0]['task_id']}] final_time = time.time() log.info(f"Total execution time = {final_time - initial_time} seconds") if len(result) > 0: return response_with(resp.SUCCESS_200, value={"result": result}) else: return response_with(resp.SERVER_ERROR_404, value={"result": result})
def get_all_experiments(): # Stores the execution start time to calculate the time it takes for the module to execute. initial_time = time.time() # MongoDB data. mongodb_client: MongoClient = get_data_client() db = mongodb_client[os.environ.get('TASKS_DATABASE_NAME', DevelopmentConfig.TASKS_DATABASE_NAME)] col = db[os.environ.get('EXPERIMENTS_COLLECTION_NAME', DevelopmentConfig.EXPERIMENTS_COLLECTION_NAME)] query = {} projection = { '_id': 0, 'task': '$task_action.kwargs.dataset.task', 'corpus': '$task_action.kwargs.dataset.corpus', 'description': '$task_name', 'task_id': 1 } data = col.find(query, projection) result = list(data) final_time = time.time() log.info(f"Total execution time = {final_time - initial_time} seconds") if len(result) > 0: return response_with(resp.SUCCESS_200, value={"result": result}) else: return response_with(resp.SERVER_ERROR_404, value={"result": result})
def get_aggregated_metrics_by_task_and_corpus(task, corpus): # Stores the execution start time to calculate the time it takes for the module to execute. initial_time = time.time() parameter_space = {'dataset': {'task': task, 'corpus': corpus}} request_args = [i for i in request.args.keys() ] if request.args is not None else [] objective = request.args['objective'].split( ',') if 'objective' in request_args else ['accuracy'] tuner = GridSearch(objective=objective, parameter_space=parameter_space) try: metrics = tuner.aggregate_metrics() except (ValueError, AssertionError): return response_with(resp.SERVER_ERROR_404, value={"result": []}) excluded_columns = ['task', 'corpus', 'balance_data', 'architecture'] for key in metrics: for column in excluded_columns: metrics[key].pop(column, None) headers = metrics[list(metrics.keys())[0]].keys() table = [[idx] + [values[h] for h in headers] for idx, values in metrics.items()] result = {'headers': list(headers), 'table': table} final_time = time.time() log.info(f"Total execution time = {final_time - initial_time} seconds") if len(result) > 0: return response_with(resp.SUCCESS_200, value={"result": result}) else: return response_with(resp.SERVER_ERROR_404, value={"result": result})
def get_features_names(task): result = get_task_request_params_names(task) if len(result) > 0: return response_with(resp.SUCCESS_200, value={"result": result}) else: return response_with(resp.SERVER_ERROR_404, value={"result": result})
def authenticate_user(): try: data = request.get_json() current_user = {} if data.get('email'): current_user = User.find_by_email(data['email']) elif data.get('username'): current_user = User.find_by_username(data['username']) if not current_user: return response_with(resp.SERVER_ERROR_404) if current_user and not current_user['is_verified']: return response_with(resp.BAD_REQUEST_400) if User.verify_hash(data['password'], current_user['password']): # JWT_ACCESS_TOKEN_EXPIRES = 15 minutos por defecto expires = datetime.timedelta(minutes=int( os.environ.get('JWT_ACCESS_TOKEN_EXPIRES', DevelopmentConfig.JWT_ACCESS_TOKEN_EXPIRES))) access_token = create_access_token( identity=current_user['username'], expires_delta=expires) return response_with(resp.SUCCESS_200, value={ 'message': 'Logged in as admin', "access_token": access_token }) else: return response_with(resp.UNAUTHORIZED_401) except Exception as e: print(e) return response_with(resp.INVALID_INPUT_422)
def get_distinct_experiments_by_task(): # Stores the execution start time to calculate the time it takes for the module to execute. initial_time = time.time() # MongoDB data. mongodb_client: MongoClient = get_data_client() db = mongodb_client[os.environ.get('TASKS_DATABASE_NAME', DevelopmentConfig.TASKS_DATABASE_NAME)] col = db[os.environ.get('EXPERIMENTS_COLLECTION_NAME', DevelopmentConfig.EXPERIMENTS_COLLECTION_NAME)] data = col.distinct('task_action.kwargs.dataset.task') result = [] for task in data: result.append({ 'task_name': get_task_type_text(task, 'es'), 'task': task }) final_time = time.time() log.info(f"Total execution time = {final_time - initial_time} seconds") if len(result) > 0: return response_with(resp.SUCCESS_200, value={"result": result}) else: return response_with(resp.SERVER_ERROR_404, value={"result": result})
def create_book(): try: data = request.get_json() book_schema = BookSchema() book = book_schema.load(data) result = book_schema.dump(book.create()) return response_with(resp.SUCCESS_201, value={"book": result}) except Exception as e: print(e) return response_with(resp.INVALID_INPUT_422)
def create_author(): try: data = request.get_json() print(data) author_schema = AuthorSchema() author = author_schema.load(data) result = author_schema.dump(author.create()) return response_with(resp.SUCCESS_201, value={"author": result}) except Exception as e: #print(e) return response_with(resp.INVALID_INPUT_422)
def verify_email(token): try: email = confirm_verification_token(token) except Exception as e: return response_with(resp.UNAUTHORIZED_401) user = User.find_by_email(email=email) if user['is_verified']: return response_with(resp.INVALID_INPUT_422) else: user['is_verified'] = True User.update_field(user, 'is_verified', True) return response_with( resp.SUCCESS_200, value={ 'message': 'E-mail verified, you can proceed to login now.' })
def create_user(): """ 用户注册接口 --- parameters: - in: body name: body schema: required: - username - password properties: username: type: string description: 用户名 default: "" password: type: string description: 用户密码 default: "" responses: 201: description: 注册成功 schema: properties: code: type: string 422: description: 注册失败 schema: properties: code: type: string message: type: string """ try: data = request.get_json() data['password'] = User.generate_hash(data['password']) user_schema = UserSchema() users = user_schema.load(data) result = user_schema.dump(users.create()) return response_with(resp.SUCCESS_201) except Exception as e: print(e) return response_with(resp.INVALID_INPUT_422)
def update_author_detail(id): data = request.get_json() get_author = Author.query.get_or_404(id) get_author.first_name = data['first_name'] get_author.last_name = data['last_name'] db.session.add(get_author) db.session.commit() author_schema = AuthorSchema() author = author_schema.dump(get_author) return response_with(resp.SUCCESS_200, value={"author": author})
def update_book_detail(id): data = request.get_json() get_book = Book.query.get_or_404() get_book.title = data['title'] get_book.year = data['year'] db.session.add(get_book) db.session.commit() book_schema = BookSchema() book = book_schema.dump(get_book) return response_with(resp.SUCCESS_200, value={"book": book})
def create_user(): try: data = request.get_json() if User.find_by_email( data['email']) is not None or User.find_by_username( data['username']) is not None: return response_with(resp.INVALID_INPUT_422) data['password'] = User.generate_hash(data['password']) token = generate_verification_token(data['email']) verification_email = url_for('user_routes.verify_email', token=token, _external=True) result = { 'db_insert': str(User.create(data)), 'verification_email': verification_email } return response_with(resp.SUCCESS_201, value={'result': result}) except Exception as e: print(e) return response_with(resp.INVALID_INPUT_422)
def modify_book_detail(id): data = request.get_or_404() get_book = Book.query.get_or_404(id) if data.get('title'): get_book.title = data['title'] if data.get('year'): get_book_year = data['year'] db.session.add(get_book) db.session.commit() book = book_schema.dump(get_book) return response_with(resp.SUCCESS_200, value={"book": book})
def get_features_values(feature, corpus): # Stores the execution start time to calculate the time it takes for the module to execute. initial_time = time.time() # MongoDB data. mongodb_client: MongoClient = get_data_client() db = mongodb_client[corpus] col = db[f"{feature}_codes"] query = {} projection = {'_id': 0, feature: 1} data = col.find(query, projection) result = [] for doc in list(data): result.append(doc[feature]) result.sort() final_time = time.time() log.info(f"Total execution time = {final_time - initial_time} seconds") if len(result) > 0: return response_with(resp.SUCCESS_200, value={"result": result}) else: return response_with(resp.SERVER_ERROR_404, value={"result": result})
def delete_author(id): get_author = Author.query.get_or_404(id) db.session.delete(get_author) db.session.commit() return response_with(resp.SUCCESS_200)
def get_author_detail(author_id): fetched = Author.query.get_or_404(author_id) author_schema = AuthorSchema() author = author_schema.dump(fetched) return response_with(resp.SUCCESS_200, value={"author": author})
def get_author_list(): fetched = Author.query.all() author_schema = AuthorSchema(many=True, only=['first_name', 'last_name', 'id']) authors = author_schema.dump(fetched) return response_with(resp.SUCCESS_200, value={"authors": authors})
def get_book_list(): fetched = Book.query.all() book_schema = BookSchema(many=True, only=['author_id', 'title', 'year']) books = book_schema.dump(fetched) return response_with(resp.SUCCESS_200, value={"books": books})
def get_book_detail(id): fetched = Book.query.get_or_404(id) book_schema = BookSchema() books = book_schema.dump(fetched) return response_with(resp.SUCCESS_200, value={"books": books})
def delete_books(id): get_book = Book.query.get_or_404(id) db.session.delete(get_book) db.session.commit() return response_with(resp.SUCCESS_204)
def predict(experiment_id): # Stores the execution start time to calculate the time it takes for the module to execute. initial_time = time.time() # Load saved task. mongodb_client: MongoClient = get_data_client() db = mongodb_client[os.environ.get('TASKS_DATABASE_NAME', DevelopmentConfig.TASKS_DATABASE_NAME)] col = db[os.environ.get('EXPERIMENTS_COLLECTION_NAME', DevelopmentConfig.EXPERIMENTS_COLLECTION_NAME)] query = { 'task_id': str(experiment_id), 'task_action.kwargs': { '$exists': True }, 'task_action.train': { '$exists': True } } projection = { '_id': 0, 'kwargs': '$task_action.kwargs', 'model_meta_file': '$task_action.train.model_meta_file' } data = col.find_one(query, projection) kwargs = data['kwargs'] model_meta_file = data['model_meta_file'] saved_params = {} if model_meta_file is not None: # Change path to Docker container. if os.name == 'posix': model_meta_file = model_meta_file.replace( '/datadrive/host-mounted-volumes/syn/', '/usr/src/') log.info( f"Loading hyperparameters from: '{str(Path(model_meta_file))}'.") try: saved_params = np.load(str(Path(model_meta_file)), allow_pickle=True).item() except FileNotFoundError: return response_with(resp.SERVER_ERROR_404, value={"result": []}) # Get collapsed unary binary trees collapsed_unary_binary_trees = get_collapsed_unary_binary_trees( normalize_incidence(str(request.json['description']), to_lower_case=True)) if len(collapsed_unary_binary_trees) == 0: return response_with(resp.INVALID_INPUT_422, value={"result": []}) # Get attention vectors attention_vectors = get_attention_vectors(collapsed_unary_binary_trees) # Data data_columns = get_task_features_column_names(kwargs['dataset']['task']) dataset = pd.DataFrame(columns=data_columns) request_args = [i for i in request.json.keys()] for column in get_task_request_params_names(kwargs['dataset']['task']): if column in request_args: dataset.at[0, column] = request.json[column] dataset.at[0, 'trees'] = collapsed_unary_binary_trees dataset.at[0, 'attention_vectors'] = attention_vectors # if task is 'duplicity' or 'similarity' need do more actions. query_limit = 1000 if 'num_issues_to_compare' in request_args: query_limit = request.json['num_issues_to_compare'] if kwargs['dataset']['task'] in ['duplicity', 'similarity']: dataset = get_pairs_dataset(dataset, kwargs['dataset']['task'], kwargs['dataset']['corpus'], query_limit).copy() # Encode structured info. dataset = encode_dataset_structured_data(dataset, kwargs['dataset']['corpus'], kwargs['dataset']['task']) # Format dataset inst = format_dataset(dataset, kwargs['dataset']['task'], kwargs['dataset']['corpus']) # Word embeddings. embeddings_dir = Path( os.environ.get('DATA_PATH', DevelopmentConfig.DATA_PATH)) / 'word_embeddings' if saved_params['embeddings_pretrained'] or 'glove' == saved_params[ 'embeddings_model']: embeddings_filename = \ get_filtered_word_embeddings_filename( model=saved_params['embeddings_model'], size=saved_params['embeddings_size'] ) else: embeddings_filename = f"{saved_params['embeddings_model']}-{kwargs['dataset']['corpus']}-" \ f"{saved_params['embeddings_size']}.txt" embeddings_path = Path(embeddings_dir) / saved_params[ 'embeddings_model'] / embeddings_filename # Change path to Docker container. if os.name == 'posix': embeddings_path = str(embeddings_path).replace( '/datadrive/host-mounted-volumes/syn/data/', '/usr/src/') if not os.path.isfile(embeddings_path): log.error( f"No such filtered word embeddings file: '{embeddings_path}'.") assert os.path.isfile( embeddings_path), 'Ensure word embeddings file exists.' log.info(f"Reading embeddings from '{embeddings_path}' ...") word_embed, w2i = get_embeddings(embeddings_path, saved_params['embeddings_size']) # Load model model_builder = get_dynet_model(kwargs['dataset']['task']) try: model = model_builder(n_classes=saved_params['n_classes'], w2i=w2i, word_embed=word_embed, params=saved_params, model_meta_file=model_meta_file) except FileNotFoundError: return response_with(resp.SERVER_ERROR_404, value={"result": []}) # build graph for this instance dy.renew_cg() result = [] df_result = pd.DataFrame(columns=['bug_id', 'predidct_proba']) if kwargs['dataset']['task'] not in ['duplicity', 'similarity']: # Check data integrity. check_data_integrity(inst[0], inst[1]) # Issue description as Tuple(trees, attention_vectors). issue_description = (inst[0], inst[1]) # Issue structured data. issue_structured_data = inst[2] pred, predict_proba, _ = model.predict(issue_description, issue_structured_data) label = get_label_column_name(kwargs['dataset']['task'], kwargs['dataset']['corpus']) codes = get_task_structured_data_codes(kwargs['dataset']['corpus'], label) result.append( {'pred': list(codes.keys())[list(codes.values()).index(pred)]}) else: for i, pair in enumerate(tqdm(inst, total=len(inst), desc='rows')): # Tuple(inst[0], inst[1], inst[2], inst[3], inst[4], inst[5], inst[6]) = # Tuple(trees_left, trees_right, attention_vectors_left, attention_vectors_right, structured_data_left, # structured_data_right, label). # build graph for this instance dy.renew_cg() # Check data integrity. check_data_integrity(pair[0], pair[2]) check_data_integrity(pair[1], pair[3]) # Issue description as Tuple(trees, attention_vectors). issue_description_left = (pair[0], pair[2]) issue_description_right = (pair[1], pair[3]) # Issue structured data. issue_structured_data_left = pair[4] issue_structured_data_right = pair[5] pred, predict_proba, _, _ = model.predict( issue_description_left, issue_description_right, issue_structured_data_left, issue_structured_data_right) if pred == 1: df_result.at[i, 'bug_id'] = pair[6] df_result.at[i, 'predidct_proba'] = predict_proba[pred] max_num_predictions = 5 if 'max_num_predictions' in request_args: max_num_predictions = request.json['max_num_predictions'] sorted_df_result = df_result.sort_values('predidct_proba', ascending=False).copy() limited_sorted_df_result = sorted_df_result.head( max_num_predictions).copy() for index, row in limited_sorted_df_result.iterrows(): result.append({ 'bug_id': int(row['bug_id']), 'predidct_proba': float(row['predidct_proba']) }) log.info(f"result = {result}") final_time = time.time() log.info(f"Total execution time = {final_time - initial_time} seconds") return response_with(resp.SUCCESS_200, value={"result": result})
def bad_request(e): logging.error(e) return response_with(resp.BAD_REQUEST_400)
def server_error(e): logging.error(e) return response_with(resp.SERVER_ERROR_500)
def not_found(e): logging.error(e) return response_with(resp.SERVER_ERROR_404)