def handle_prediction_state_by_project(project_id, body):  # noqa: E501
    """Handles the state of the prediction of a project.

    Handles the prediction state. # noqa: E501

    :param project_id: ID of the current project.
    :type project_id: 
    :param prediction_state: 
    :type prediction_state: dict | bytes

    :rtype: PredictionStatus
    """
    if connexion.request.is_json:
        prediction_state = PredictionState.from_dict(
            connexion.request.get_json())  # noqa: E501

    try:
        db_instance = DatabaseConnector.get_db_instance()
        project = db_instance.get_project_by_id('project_id', project_id)
        predicted_network_id = project['predicted_network_id']

        #Note: Currently only working for one request. For parallel advanced
        # handling (task queues, etc) necessary.
        global prediction_process
        if prediction_state.state == "Start":
            print("BEGIN PREDICTION PRCOESS")
            predictors = db_instance.get_selected_network_features_by_id(
                'predicted_network_id', predicted_network_id)
            evaluation = db_instance.get_evaluation_result_by_id(
                'project_id', project_id)
            configuration = evaluation['result_data']
            worker = PredictionWorker(0, project_id, predictors,
                                      configuration['with_validation'],
                                      configuration['ml_preprocessing'],
                                      configuration['train_sampling_ratio'],
                                      configuration['test_sampling_ratio'],
                                      configuration['random_seed'])

            prediction_process = multiprocessing.Process(target=worker.predict,
                                                         daemon=True)

            DatabaseConnector.get_db_instance().add_linkprediction_status(
                0, 0, 0, 'Prediction', 'Waiting', 'project_id', project_id)

            prediction_process.start()
        elif prediction_state.state == "Abort":
            print("ABORT PREDICTION PRCOESS")
            prediction_process.terminate()
            db_instance.add_linkprediction_status(0, 0, 0, "Prediction",
                                                  "Failed", 'project_id',
                                                  project_id)
    except Exception as error:
        return ('Exception', error)

    return 'Prediction state altered!'
def get_prediction_status_by_project(project_id):  # noqa: E501
    """Find prediction status by a project ID.

    Returns the current prediction state. # noqa: E501

    :param project_id: ID of the current project.
    :type project_id: 

    :rtype: PredictionStatus
    """
    try:
        db_instance = DatabaseConnector.get_db_instance()
        predicted_network = db_instance.get_predicted_network_by_id(
            'project_id', project_id)

        predicted_network_id = predicted_network['predicted_network_id']

        prediction_status = db_instance.get_last_linkprediction_status_by_id(
            'predicted_network_id', predicted_network_id)

        return PredictionStatus(prediction_status['status_id'],
                                prediction_status['log_timestamp'],
                                prediction_status['current_step'],
                                prediction_status['max_steps'],
                                prediction_status['process_step'],
                                prediction_status['status_value'])
    except Exception as error:
        return ('Exception', error)
def get_original_network_by_project(project_id):  # noqa: E501
    """Find original network by a project ID.

    Returns the original network. # noqa: E501

    :param project_id: ID of the current project.
    :type project_id: 

    :rtype: OriginalNetwork
    """
    try:
        db = DatabaseConnector.get_db_instance()
        original_network = db.get_original_network_by_id(
            'project_id', project_id)
        nodes = db.get_nodes_by_id('original_network_id',
                                   original_network['original_network_id'])
        edges = db.get_edges_of_original_network_by_id(
            'original_network_id', original_network['original_network_id'])
        return OriginalNetwork(id=original_network['original_network_id'],
                               designation=original_network['designation'],
                               directed=original_network['directed'],
                               multigraph=original_network['multigraph'],
                               node_count=len(nodes),
                               edge_count=len(edges))
    except Exception as error:
        return ('Exception', error)
def occupation_similarity(u_attr: str, v_attr: str):
    db = DatabaseConnector.get_db_instance()
    u_format = _get_occupation_format(u_attr)
    v_format = _get_occupation_format(v_attr)
    if u_format is None or v_format is None:
        raise ValueError('Occupation formatting is wrong!')
    if len(u_format) == 2:
        u_occupations = db.get_occupations_by_column(*u_format)
    else:
        u_occupations = [db.get_occupation_by_hierarchy(*u_format)]
    if len(v_format) == 2:
        v_occupations = db.get_occupations_by_column(*v_format)
    else:
        v_occupations = [db.get_occupation_by_hierarchy(*v_format)]
    u_record, v_record = _choose_correct_hierarchy(u_occupations,
                                                   v_occupations)
    if u_record['job_id'] == 0 or v_record['job_id'] == 0:
        return 0
    if u_record['job_id'] == v_record['job_id']:
        return 1
    similarity_score = 0
    if u_record['field_of_activity'] == v_record['field_of_activity']:
        similarity_score += 0.3
    if u_record['subject_area'] == v_record['subject_area']:
        similarity_score += 0.3
    similarity_score += 0.4 * _get_competence_similarity(
        u_competences=u_record['competences'],
        v_competences=v_record['competences'])
    return similarity_score
    def _save_predicted_graph(self, predicted_graph_train, predicted_graph_test):
        self.monitor.notify('Processing')

        pred_net = DatabaseConnector.get_db_instance().get_predicted_network_by_id('project_id', self.project_id)
        if self.validation:
            save_predicted_graph_to_db(predicted_graph_test, pred_net['predicted_network_id'])
        else:
            save_predicted_graph_to_db(predicted_graph_train, pred_net['predicted_network_id'])

        self.monitor.notify('Finished')
def get_predictors_by_project(project_id):  # noqa: E501
    """Find predictors by project ID.

    Returns all available and selected predictors # noqa: E501

    :param project_id: ID of the current project.
    :type project_id: 

    :rtype: Predictors
    """
    try:
        db_instance = DatabaseConnector.get_db_instance()
        predicted_network = db_instance.get_predicted_network_by_id(
            'project_id', project_id)
        predicted_network_id = predicted_network['predicted_network_id']
        standard_features = db_instance.get_standard_network_features()
        standard_feature_list = []
        exogenous_attributes = db_instance.get_distinct_node_attributes_by_id(
            'project_id', project_id)
        exogenous_attributes = [
            attr for attr in exogenous_attributes
            if attr in ATTR_HANDLING and ATTR_HANDLING[attr] is not None
        ]
        exogenous_attributes_params = [{
            'attribute': attr,
            'value': 1
        } for attr in exogenous_attributes]

        for standard_feature in standard_features:
            if standard_feature[
                    'feature_type'] == 'Social Theory with exogenous Attributes':
                standard_feature['parameters'][
                    'attribute_weightings'] = exogenous_attributes_params
            standard_feature_list.append(
                Predictor(id=standard_feature['standard_feature_id'],
                          designation=standard_feature['designation'],
                          category=standard_feature['feature_type'],
                          parameters=standard_feature['parameters']))
        selected_features = db_instance.get_selected_network_features_by_id(
            id_type='predicted_network_id', reference_id=predicted_network_id)
        selected_feature_list = []
        for selected_feature in selected_features:
            selected_feature_list.append(
                Predictor(id=selected_feature['selected_feature_id'],
                          designation=selected_feature['designation'],
                          category=selected_feature['feature_type'],
                          parameters=selected_feature['parameters']))
        return Predictors(available_predictors=standard_feature_list,
                          selected_predictors=selected_feature_list,
                          evaluation_setup=EvaluationSetup(
                              random_seed=42,
                              with_validation=False,
                              train_sampling_ratio=0.7))
    except Exception as error:
        return ('Exception', error)
def delete_project_by_id(project_id):  # noqa: E501
    """Delete project by an ID.

    Deletes a project by an ID. # noqa: E501

    :param project_id: ID of the project to delete.
    :type project_id:

    :rtype: None
    """
    try:
        db = DatabaseConnector.get_db_instance()
        db.delete_project(project_id)
        return f'Deleted project with ID {project_id}'
    except Exception as error:
        return ('Exception', error)
def delete_predicted_network_by_project(project_id):  # noqa: E501
    """Delete predicted network of by a project ID.

    Deletes predicted network of a project. # noqa: E501

    :param project_id: ID of the current project.
    :type project_id: 

    :rtype: None
    """
    try:
        db = DatabaseConnector.get_db_instance()
        db.delete_predicted_network_by_id('project_id', project_id)
        return f'Deleted predicted network with ProjectID {project_id}'
    except Exception as error:
        return ('Exception', error)
def create_prediction_setup(project_id, body):  # noqa: E501
    """Creates a prediction.

    Creates a predictors setup. # noqa: E501

    :param project_id: ID of the current project.
    :type project_id: 
    :param prediction_setup: 
    :type prediction_setup: dict | bytes

    :rtype: None
    """
    try:
        db_instance = DatabaseConnector.get_db_instance()
        predicted_network = db_instance.get_predicted_network_by_id(
            'project_id', project_id)
        predicted_network_id = predicted_network['predicted_network_id']

        for predictor in body['selected_predictors']:
            db_instance.add_selected_network_feature_to_project(
                designation=predictor["designation"],
                feature_type=predictor["category"],
                parameters=predictor["parameters"],
                predicted_network_id=predicted_network_id)

        evaluation_setup_old = db_instance.get_evaluation_result_by_id(
            'project_id', project_id)
        evaluation_setup = body['evaluation_setup']
        if 'train_results' in evaluation_setup_old['result_data']:
            evaluation_setup['train_results'] = evaluation_setup_old[
                'result_data']['train_results']
            evaluation_setup['test_results'] = evaluation_setup_old[
                'result_data']['test_results']
            evaluation_setup['timestamp'] = evaluation_setup_old[
                'result_data']['timestamp']

        db_instance.add_or_update_evaluation_result(
            project_id, json.dumps(evaluation_setup))
        return 'Created prediction!'
    except Exception as error:
        return ('Exception', error)
def get_project_by_id(project_id):  # noqa: E501
    """Find project by an ID.

    Returns a project by an ID. # noqa: E501

    :param project_id: ID of the project to return.
    :type project_id:

    :rtype: Project
    """
    try:
        db = DatabaseConnector.get_db_instance()
        project_data = db.get_project_by_id('project_id', project_id)
        return Project(id=project_data['project_id'],
                       designation=project_data['designation'],
                       description=project_data['description'],
                       original_network_id=project_data['original_network_id']
                       # predicted_network_id
                       )
    except Exception as error:
        return ('Exception', error)
def get_predicted_network_by_project(project_id):  # noqa: E501
    """Find predicted network by a project ID.

    Returns the predicted network. # noqa: E501

    :param project_id: ID of the current project.
    :type project_id: 

    :rtype: PredictedNetwork
    """
    try:
        db = DatabaseConnector.get_db_instance()
        predicted_network = db.get_predicted_network_by_id(
            'project_id', project_id)

        predicted_graph = load_predicted_graph_from_db(
            predicted_network['predicted_network_id'])

        return PredictedNetwork(nodes=predicted_graph['nodes'],
                                links=predicted_graph['links'],
                                information=predicted_graph['information'])
    except Exception as error:
        return ('Exception', error)
def get_evaluation_results_by_project(project_id):  # noqa: E501
    """Find evaluation results by project ID.

    Returns evaluation results of a project. # noqa: E501

    :param project_id: ID of the current project.
    :type project_id: 

    :rtype: EvaluationResults
    """
    try:
        db_instance = DatabaseConnector.get_db_instance()
        evaluation_result = db_instance.get_evaluation_result_by_id(
            'project_id', project_id)

        timestamp = None
        if 'timestamp' in evaluation_result['result_data']:
            timestamp = evaluation_result['result_data']['timestamp']

        return EvaluationResults(timestamp=timestamp,
                                 results=evaluation_result['result_data'])
    except Exception as error:
        return ('Exception', error)
def delete_prediction_setup(project_id):  # noqa: E501
    """Delete selected prediction setup of a project.

    Delete prediction setup of a project. # noqa: E501

    :param project_id: ID of the current project.
    :type project_id: 

    :rtype: None
    """
    try:
        db_instance = DatabaseConnector.get_db_instance()
        predicted_network = db_instance.get_predicted_network_by_id(
            'project_id', project_id)
        predicted_network_id = predicted_network['predicted_network_id']

        prediction_features = db_instance.get_selected_network_features_by_id(
            'predicted_network_id', predicted_network_id)
        for predictor in prediction_features:
            db_instance.delete_selected_network_features_by_id(
                'predicted_network_id', predicted_network_id)
        return 'Deleted prediction features!'
    except Exception as error:
        return ('Exception', error)
def get_projects():  # noqa: E501
    """Returns a list of available projects.

    Get all projects as an array. # noqa: E501


    :rtype: List[Project]
    """
    try:
        db = DatabaseConnector.get_db_instance()
        projects_data = db.get_projects()

        project_instances = []
        for project_data in projects_data:
            project_instances.append(
                Project(
                    id=project_data['project_id'],
                    designation=project_data['designation'],
                    description=project_data['description'],
                    original_network_id=project_data['original_network_id'],
                    predicted_network_id=project_data['predicted_network_id']))
        return project_instances
    except Exception as error:
        return ('Exception', error)
 def _update(self, data):
     DatabaseConnector.get_db_instance().add_linkprediction_status(
         data['process_id'], data['step_index'], data['max_steps'],
         data['name'], data['status'], data['id_type'],
         data['reference_id'])
def create_project(**kwargs):  # noqa: E501
    """Creates a project with an original network file.

    Creates a project with an original network file. # noqa: E501

    :param designation:
    :type designation: str
    :param description:
    :type description: str
    :param network_designation:
    :type network_designation: str
    :param network_directed:
    :type network_directed: bool
    :param network_multigraph:
    :type network_multigraph: bool
    :param network_file: Binary object which contains the network file with a standard network format.
    :type network_file: str
    :param additional_network_file: Binary object which contains an additional network file with a standard network format (especailly used for CSV imports).
    :type additional_network_file: str
    :param file_format:
    :type file_format: str

    :rtype: Project
    """
    body = dict(kwargs.items()).get('body')
    file = dict(kwargs.items()).get('network_file')
    additional_file = dict(kwargs.items()).get('additional_network_file')
    # Try to process and safe the file before accessing the Database
    try:
        file_format = body.get('file_format')
        network_file = NetworkFile(file_format, file, additional_file)
        node_list = network_file.parse_nodes()
    except Exception:
        logging.exception("Exception while handling the input file")
        e = http_exceptions.InternalServerError(
            description=
            'Something went wrong! Please check if your network file is correct.'
        )
        raise e

    try:
        db = DatabaseConnector.get_db_instance()
        project_id = db.add_project(designation=body.get('designation'),
                                    description=body.get('description'))
        original_network_id = db.add_original_network_to_project(
            designation=body.get('network_designation'),
            directed=body.get('network_directed'),
            multigraph=body.get('network_multigraph'),
            project_id=project_id)
        predicted_network_id = db.add_predicted_network_to_project(
            designation=body.get('network_designation'), project_id=project_id)
        nodes = db.add_nodes(node_list, original_network_id,
                             predicted_network_id)
        edge_list = network_file.parse_edges(nodes)
        db.add_edges_to_original_network(edge_list, original_network_id)
        for node in nodes:
            attribute_list = network_file.parse_attributes(node[0])
            if attribute_list:
                db.add_node_attributes(attribute_list, node[1])

        graph = build_original_graph('project_id', project_id)
        save_predicted_graph_to_db(graph.copy(), predicted_network_id)

        default_evaluation_setup = {
            "random_seed": 42,
            "with_validation": False,
            "train_sampling_ratio": 0.8,
            "test_sampling_ratio": 0.9,
            "ml_preprocessing": False
        }
        db.add_or_update_evaluation_result(project_id,
                                           default_evaluation_setup)

        return Project(id=project_id,
                       designation=body.get('designation'),
                       description=body.get('description'),
                       original_network_id=original_network_id,
                       predicted_network_id=predicted_network_id)
    except Exception:
        logging.exception(
            "Exception occured while inserting data in the database")
        e = http_exceptions.InternalServerError(
            description=
            'Something went wrong! The input file seems to be wrong and the data could not be loaded into the database.'
        )
        raise e
    def predict(self):
        try:
            self.monitor.pending()

            # PIPELINE - PREPARATION
            ground_truth_graph_h = build_original_graph('project_id', self.project_id, 'hierarchical')
            ground_truth_graph_f = hierarchical_to_flat(ground_truth_graph_h)
            train_graph_f, test_graph_f = self._sample_graphs(ground_truth_graph_f)
            train_missing_edges, test_missing_edges = self._get_missing_edges(ground_truth_graph_f, train_graph_f, test_graph_f)
            train_set, test_set = self._prepare_labels(ground_truth_graph_f, test_graph_f, train_missing_edges, test_missing_edges)

            train_graph_h = flat_to_hierarchical(train_graph_f)
            predicted_graph_train = train_graph_h.copy()
            test_graph_h = None
            predicted_graph_test = None
            if self.validation:
                test_graph_h = flat_to_hierarchical(test_graph_f)
                predicted_graph_test = test_graph_h.copy()

            # PIPELINE - TOPOLOGY
            train_t_df, test_t_df = self._topology_pipeline(ground_truth_graph_f, train_graph_f, test_graph_f, train_set, test_set)     

            # PIPELINE - SOCIALTHEORY
            train_st_df, test_st_df = self._socialtheory_pipeline(train_graph_h, test_graph_h, train_set, test_set, predicted_graph_train, predicted_graph_test)

            # PIPELINE - CLASSIFICATION
            train_features_df = pd.concat([train_t_df, train_st_df], axis=1)
            test_features_df = None
            if not test_t_df is None:
                test_features_df = pd.concat([test_t_df, test_st_df], axis=1)

            train_c_df, test_c_df = self._classification_pipeline(train_features_df, test_features_df)

            # CREATE PREDICTED GRAPH
            self._add_topology_to_predicted_graph(train_t_df, test_t_df, predicted_graph_train, predicted_graph_test)
            self._add_classification_to_predicted_graph(train_set.node_pairs, train_c_df, test_c_df, predicted_graph_train, predicted_graph_test)
            train_final = pd.concat([train_set.node_pairs, train_c_df], axis=1)
            train_final = pd.concat([train_set.label, train_final], axis=1)

            test_final = None
            if not test_c_df is None:
                test_final = pd.concat([test_set.node_pairs, test_c_df], axis=1)
                test_final = pd.concat([test_set.label, test_final], axis=1)

            self._save_predicted_graph(predicted_graph_train, predicted_graph_test)

            # PIPELINE - EVALUATION
            results_train, results_test = self._create_results(
                train_final, test_final)

            db_instance = DatabaseConnector.get_db_instance()
            evaluation_model = db_instance.get_evaluation_result_by_id('project_id', self.project_id)
            evaluation_data = evaluation_model['result_data']
            evaluation_data['timestamp'] = datetime.datetime.now().timestamp()
            evaluation_data['train_results'] = results_train
            evaluation_data['test_results'] = results_test
            db_instance.add_or_update_evaluation_result(self.project_id, evaluation_data)

            self.monitor.finished()
        except Exception as e:
            print(e)
            self.monitor.failed()
def build_original_graph(id_type, reference_id, return_type='hierarchical'):
    """
    id_type: possible values = ['original_network_id', 'project_id']
    reference_id: value of the id
    return_type:
        hierarchical -> returns original_graph_hierarchical
        flat -> returns original_graph_flat
        both -> returns (original_graph_hierarchical, original_graph_flat)
    """
    if id_type not in ['original_network_id', 'project_id']:
        raise ValueError('Parameter id_type is not valid!')
    if return_type not in ['hierarchical', 'flat', 'both']:
        raise ValueError('Parameter return_type is not valid!')
    try:
        db = DatabaseConnector.get_db_instance()
        nodes = db.get_nodes_by_id(id_type, reference_id)
        uuid_to_id = {
            node['node_id']: node['node_network_id']
            for node in nodes
        }
        edges = db.get_edges_of_original_network_by_id(id_type, reference_id)
        edges_flat = edges.copy()
        edges_hierarchical = edges.copy()
        original_graph_flat = DiGraph()
        original_graph_hierarchical = DiGraph()

        for node in nodes:
            if return_type == 'hierarchical' or return_type == 'both':
                node_attributes = db.get_node_attributes_by_id(
                    'node_id', node['node_id'])
                kv_attributes = {
                    record['attribute_name']: record['attribute_value']
                    for record in node_attributes
                }
                original_graph_hierarchical.add_node(
                    node_for_adding=node['node_network_id'],
                    identifiers={**node},
                    **kv_attributes)
            if return_type == 'flat' or return_type == 'both':
                original_graph_flat.add_node(
                    node_for_adding=node['node_network_id'])

        if return_type == 'hierarchical' or return_type == 'both':
            for edge in edges_hierarchical:
                source_node_record = next(
                    filter(lambda x: x['node_id'] == edge['source_node'],
                           nodes))
                target_node_record = next(
                    filter(lambda x: x['node_id'] == edge['target_node'],
                           nodes))
                sub_edges = [{
                    'source': source_node_record['node_network_id'],
                    'target': target_node_record['node_network_id'],
                    'predicted': False
                }]
                rev_edge = next(
                    filter(
                        lambda x: x['target_node'] == edge['source_node'] and
                        x['source_node'] == edge['target_node'], edges), None)
                if rev_edge is not None:
                    sub_edges.append({
                        'source':
                        target_node_record['node_network_id'],
                        'target':
                        source_node_record['node_network_id'],
                        'predicted':
                        False
                    })
                    edges_hierarchical.remove(rev_edge)

                edge[uuid_to_id[edge['source_node']]] = edge['source_node']
                edge[uuid_to_id[edge['target_node']]] = edge['target_node']

                original_graph_hierarchical.add_edge(
                    u_of_edge=source_node_record['node_network_id'],
                    v_of_edge=target_node_record['node_network_id'],
                    identifiers={**edge},
                    edges=sub_edges,
                    edge_color=COLOR_ORIGINAL_ONLY)

        if return_type == 'flat' or return_type == 'both':
            for edge in edges_flat:
                source_node_record = next(
                    filter(lambda x: x['node_id'] == edge['source_node'],
                           nodes))
                target_node_record = next(
                    filter(lambda x: x['node_id'] == edge['target_node'],
                           nodes))

                edge[uuid_to_id[edge['source_node']]] = edge['source_node']
                edge[uuid_to_id[edge['target_node']]] = edge['target_node']

                original_graph_flat.add_edge(
                    u_of_edge=source_node_record['node_network_id'],
                    v_of_edge=target_node_record['node_network_id'],
                    identifiers={**edge},
                    edge_color=COLOR_ORIGINAL_ONLY,
                    predicted=False)

        if return_type == 'flat':
            return original_graph_flat
        if return_type == 'hierarchical':
            return original_graph_hierarchical
        if return_type == 'both':
            return (original_graph_hierarchical, original_graph_flat)
    except Exception as error:
        print(error)
        return ('Exception', error)