コード例 #1
0
def load(obo_file):
    """
    Load OBO file into a networkx graph.

    :param obo_file: OBO definition file.
    :param logger: Python `logging` logger instance.
    :return: `networkx.MultiDiGraph`
    """
    try:
        hpo_network = obonet.read_obo(obo_file)
        #return nx.MultiDiGraph(hpo_network.subgraph(['HP:0000118'] + list(nx.ancestors(hpo_network, 'HP:0000118'))))
    except (FileNotFoundError, PermissionError) as e:
        if logger is not None:
            logger.critical(e)
        else:
            sys.stderr.write(str(e))
        exit(1)

    # roots for non-phenotype nodes
    non_phenotypes = {
        'mortality_aging': 'HP:0040006',
        'mode_of_inheritance': 'HP:0000005',
        'clinical_modifier': 'HP:0012823',
        'frequency': 'HP:0040279',
        'clinical_course': 'HP:0031797',
    }

    # remove non-phenotype branches
    for _, hpo_id in non_phenotypes.items():
        if hpo_id in hpo_network.nodes:
            children = nx.ancestors(hpo_network, hpo_id)
            hpo_network.remove_nodes_from([hpo_id] + list(children))

    return hpo_network
コード例 #2
0
def likelihood_moldx(input_file, output_file=None, k_phenotype_groups=1000):
    """
    :param input_file: The file path to a file containing three columns. [ID\tkey=value\thpodid,hpoid,hpoid]
    :param output_file: The file path to an output file containing the predicted probabilities
    :param k_phenotype_groups: The number of phenotype groups to use for encoding phenotypes. The CLI version of phenopy allows for one of [1000, 1500] 
    """
    try:
        obo_file = config.get('hpo', 'obo_file')
    except (NoSectionError, NoOptionError):
        logger.critical(
            'No HPO OBO file found in the configuration file. See "hpo:obo_file" parameter.'
        )
        sys.exit(1)
    try:
        disease_to_phenotype_file = config.get('hpo',
                                               'disease_to_phenotype_file')
    except (NoSectionError, NoOptionError):
        logger.critical(
            'No HPO annotated dataset file found in the configuration file.'
            ' See "hpo:disease_to_phenotype_file" parameter.')
        sys.exit(1)

    logger.info(f'Loading HPO OBO file: {obo_file}')
    hpo_network, alt2prim, _ = \
        generate_annotated_hpo_network(obo_file,
                                       disease_to_phenotype_file,
                                       )

    # parse input records
    input_records = parse_input(input_file, hpo_network, alt2prim)
    record_ids = [record["record_id"] for record in input_records]
    phenotypes = [record["terms"] for record in input_records]

    # predict likelihood of molecular diagnosis
    positive_probabilities = predict_likelihood_moldx(
        phenotypes,
        phenotype_groups=None,
        hpo_network=hpo_network,
        alt2prim=alt2prim,
        k_phenotype_groups=k_phenotype_groups,
    )

    if output_file is None:
        output_file = "phenopy.likelihood_moldx.txt"
    try:
        with open(output_file, "w") as f:
            for sample_id, probability in zip(record_ids,
                                              positive_probabilities):
                f.write(f"{sample_id}\t{probability}\n")
    except IOError:
        sys.exit("Something went wrong writing the probabilities to file")
コード例 #3
0
def export_phenotype_hpoa_with_no_parents(phenotype_hpoa_file,
                                          phenotype_hpoa_no_parents_file,
                                          hpo_network,
                                          logger=None):
    """
    Load HPO terms associated to genes as annotated in https://hpo.jax.org/app/download/annotation.
    Filter the parent terms for each gene.
    Dump pheno2genes_no_parents_file

    :param phenotype_hpoa_file: Phenotypes to diseases file.
    :param phenotype_hpoa_no_parents_file: Phenotypes to diseases file with parents removed.
    :param hpo_network: The HPO networkx object.
    :param logger: Python `logging` logger instance.
    :return: None
    """
    try:
        with open(phenotype_hpoa_file, 'r') as tsv_fh:
            # skip the comment lines
            [next(tsv_fh) for _ in range(4)]
            df = pd.read_csv(
                tsv_fh,
                sep='\t',
            )
    except (FileNotFoundError, PermissionError) as e:
        if logger is not None:
            logger.critical(e)
        else:
            sys.stderr.write(str(e))
        exit(1)

    no_parents_df = df.copy()
    for gene, annotations in df.groupby('#DatabaseID'):
        termlist = [
            node for node in annotations['HPO_ID'].tolist()
            if node in hpo_network.nodes()
        ]
        termlist = remove_parents(termlist, hpo_network)
        parent_idx = annotations.loc[~annotations['HPO_ID'].isin(termlist
                                                                 )].index
        no_parents_df.drop(parent_idx, inplace=True)

    try:
        no_parents_df.to_csv(phenotype_hpoa_no_parents_file,
                             sep='\t',
                             index=False)
    except PermissionError as e:
        if logger is not None:
            logger.critical(e)
        else:
            sys.stderr.write(str(e))
        exit(1)
コード例 #4
0
def parse_input(input_file, hpo_network, alt2prim):
    """
    Parse input file.
    """
    try:
        with open(input_file, 'r') as input_fh:
            reader = csv.reader(filter(lambda l: not l.startswith('#'),
                                       input_fh),
                                delimiter='\t')
            records = []
            for line in reader:
                # prcoess terms with convert and filter first
                terms = []
                for term_id in line[2].split('|'):
                    # convert alternate ids to primary
                    if term_id in alt2prim:
                        term_id = alt2prim[term_id]
                    # filtering terms not in the hpo network
                    if term_id not in hpo_network.nodes():
                        continue
                    terms.append(term_id)

                record = {
                    'record_id':
                    line[0],
                    'terms':
                    remove_parents(terms, hpo_network),
                    'weights': {},
                    **dict(
                        item.split('=') for item in line[1].split(';') if line[1] != '.')
                }

                # assign new weights here ex. Sex weights (similar to the age weights).
                records.append(record)

    except (FileNotFoundError, PermissionError) as e:
        logger.critical(
            f'Provided input file could not be loaded or does not exist: {e}')
        exit(1)
    except ValueError:
        logger.critical(
            f'Unable to parse input file, invalid line number: {reader.line_num}:{input_file}'
        )
        exit(1)

    return records
コード例 #5
0
ファイル: experiment.py プロジェクト: arvkevi/phenopy
def request_mimid_info(mimid):
    """
    request mimid description from OMIM
    """
    access = "entry?"
    api_key = os.getenv("OMIM_API_KEY")
    if api_key is None:
        api_key = config.get("omim", "omim_api_key")
    payload = {
        "mimNumber": mimid,
        "include": "text",
        "format": "json",
        "apiKey": api_key,
    }

    r = requests.get(OMIM_API_URL + access, params=payload)
    if r.status_code == 200:
        return r
    else:
        logger.critical(
            "Please set the omim_api_key in your phenopy.ini config file")
コード例 #6
0
def read_records_file(records_file,
                      no_parents=False,
                      hpo_network=None,
                      logger=None):
    """
    Parse input file for patient descriptions into an array of dictionaries
    :param records_file: path to the records file to parse
    :param no_parents: remove parent nodes
    :param hpo_network: hpo network to use in removing parents
    :param logger: logger object to use in reporting errors
    :return: list of dictionaries
    """
    try:
        with open(records_file) as records_fh:
            reader = csv.reader(records_fh, delimiter='\t')
            records = []
            for line in reader:
                if line[0].startswith('#'):
                    continue
                dict_ = {
                    'sample': line[0],
                    'age': parse(line[1], what='age'),
                    'gender': parse(line[1], what='sex'),
                    'terms': parse(line[2], what='HPO')
                }

                if no_parents is True and hpo_network is not None:
                    dict_['terms'] = remove_parents(dict_['terms'],
                                                    hpo_network)
                else:
                    pass
                records.append(dict_)
        return records
    except (FileNotFoundError, PermissionError) as e:
        if logger is not None:
            logger.critical(e)
        else:
            sys.stderr.write(str(e))
        exit(1)
コード例 #7
0
def _load_hpo_network(obo_file,
                      terms_to_genes,
                      annotations_count,
                      custom_annotations_file,
                      hpo_network_file=None):
    """
    Load and process phenotypes to genes and obo files if we don't have a processed network already.
    """
    # We instruct the user that they can set hpo_network_file in .phenopy/phenopy.ini
    # The default value is empty string, so check for that first.
    if hpo_network_file is None:
        hpo_network_file = config.get('hpo', 'hpo_network_file')

    if not os.path.exists(hpo_network_file):
        # load and process hpo network
        logger.info(f'Loading HPO OBO file: {obo_file}')
        hpo_network = load_obo(obo_file, logger=logger)
        hpo_network = process(hpo_network,
                              terms_to_genes,
                              annotations_count,
                              custom_annotations_file,
                              logger=logger)

        # save a cache of the processed network
        cache(hpo_network, hpo_network_file)
    # the default hpo_network.pickle file was found
    else:
        try:
            hpo_network = restore(hpo_network_file)
        except (FileNotFoundError, PermissionError, IsADirectoryError) as e:
            logger.critical(
                f'{hpo_network_file} is not a valid path to a pickled hpo_network file.\n'
                f'In your $HOME/.phenopy/phenopy.ini, please set hpo_network_file'
                f'=/path/to/hpo_netowrk.pickle OR leave it empty, which is the default. '
            )
            raise e
    return hpo_network
コード例 #8
0
ファイル: likelihood.py プロジェクト: arvkevi/phenopy
def predict_likelihood_moldx(phenotypes,
                             phenotype_groups=None,
                             hpo_network=None,
                             alt2prim=None,
                             k_phenotype_groups=1000):
    """
    Predicts the likelihood of molecular diagnosis given a set of phenotypes.
    :param phenotypes: A list of phenotypes or a list of lists of phenotypes.
    :param phenotype_groups: <optionnal> A dictionary of phenotype to phenotype group mappings.
    :param hpo_network: <optional> The hpo networkx object.
    :param alt2prim: <optional> A dictionary of alternate phenotype ids to primary phenotype ids. (must be given if hpo_network is provided)
    :param k_phenotype_groups <optional> An integer that represents the number of phenotype groups to use.
    :return: An array of probabilities for the positive class.
    """
    # detect if phenotypes is 1d or 2d
    if hpo_network is None or alt2prim is None:
        try:
            obo_file = config.get('hpo', 'obo_file')
        except (NoSectionError, NoOptionError):
            logger.critical(
                'No HPO OBO file found in the configuration file. See "hpo:obo_file" parameter.'
            )
            raise
        try:
            disease_to_phenotype_file = config.get(
                'hpo', 'disease_to_phenotype_file')
        except (NoSectionError, NoOptionError):
            logger.critical(
                'No HPO annotated dataset file found in the configuration file.'
                ' See "hpo:disease_to_phenotype_file" parameter.')
            raise
        logger.info(f'Loading HPO OBO file: {obo_file}')
        hpo_network, alt2prim, _ = \
            generate_annotated_hpo_network(obo_file,
                                        disease_to_phenotype_file,
                                        )
    if phenotype_groups is None:
        phenotype_groups = read_phenotype_groups()

    try:
        phenotype_groups[list(phenotype_groups)[0]][f"k{k_phenotype_groups}"]
    except KeyError:
        logger.critical(
            "The value for k_phenotype_groups was not valid. Please use a valid k from the phenotype_groups dictionary."
        )
        raise

    encoded_phenotypes = encode_phenotypes(phenotypes,
                                           phenotype_groups,
                                           hpo_network,
                                           alt2prim,
                                           k=k_phenotype_groups)
    model = joblib.load(config['models']['likelihood.model'])
    probabilities = model.predict_proba(encoded_phenotypes)
    return probabilities[:, 1]
コード例 #9
0
def score(input_file,
          output_file='-',
          records_file=None,
          annotations_file=None,
          custom_disease_file=None,
          ages_distribution_file=None,
          self=False,
          summarization_method='BMWA',
          scoring_method='HRSS',
          threads=1):
    """
    Scores similarity of provided HPO annotated entries (see format below) against a set of HPO annotated dataset. By
    default scoring happens against diseases annotated by the HPO group. See https://hpo.jax.org/app/download/annotation.

    Phenopy also supports scoring the product of provided entries (see "--product") or scoring against a custom records
    dataset (see "--records-file).

    :param input_file: File with HPO annotated entries, one per line (see format below).
    :param output_file: File path where to store the results. [default: - (stdout)]
    :param records_file: An entity-to-phenotype annotation file in the same format as "input_file". This file, if
     provided, is used to score entries in the "input_file" against entries here. [default: None]
    :param annotations_file: An entity-to-phenotype annotation file in the same format as "input_file". This file, if
     provided, is used to add information content to the network. [default: None]
    :param custom_disease_file: entity Annotation for ranking diseases/genes
    :param ages_distribution_file: Phenotypes age summary stats file containing phenotype HPO id, mean_age, and std.
     [default: None]
    :param self: Score entries in the "input_file" against itself.
    :param summarization_method: The method used to summarize the HRSS matrix. Supported Values are best match average
    (BMA), best match weighted average (BMWA), and maximum (maximum). [default: BMWA]
    :param scoring_method: Either HRSS or Resnik
    :param threads: Number of parallel processes to use. [default: 1]
    """

    try:
        obo_file = config.get('hpo', 'obo_file')
    except (NoSectionError, NoOptionError):
        logger.critical(
            'No HPO OBO file found in the configuration file. See "hpo:obo_file" parameter.'
        )
        sys.exit(1)
    if custom_disease_file is None:
        try:
            disease_to_phenotype_file = config.get(
                'hpo', 'disease_to_phenotype_file')
        except (NoSectionError, NoOptionError):
            logger.critical(
                'No HPO annotated dataset file found in the configuration file.'
                ' See "hpo:disease_to_phenotype_file" parameter.')
            sys.exit(1)
    else:
        logger.info(
            f"using custom disease annotation file: {custom_disease_file}")
        disease_to_phenotype_file = custom_disease_file

    logger.info(f'Loading HPO OBO file: {obo_file}')
    hpo_network, alt2prim, disease_records = \
        generate_annotated_hpo_network(obo_file,
                                       disease_to_phenotype_file,
                                       annotations_file=annotations_file,
                                       ages_distribution_file=ages_distribution_file
                                       )

    # parse input records
    input_records = parse_input(input_file, hpo_network, alt2prim)

    # create instance the scorer class
    try:
        scorer = Scorer(hpo_network,
                        summarization_method=summarization_method,
                        scoring_method=scoring_method)
    except ValueError as e:
        logger.critical(f'Failed to initialize scoring class: {e}')
        sys.exit(1)

    if self:
        score_records = input_records

        scoring_pairs = half_product(len(score_records), len(score_records))
    else:
        if records_file:
            score_records = parse_input(records_file, hpo_network, alt2prim)
        else:
            score_records = disease_records

        scoring_pairs = itertools.product(
            range(len(input_records)),
            range(len(score_records)),
        )

    results = scorer.score_records(input_records, score_records, scoring_pairs,
                                   threads)

    with open_or_stdout(output_file) as output_fh:
        output_fh.write('\t'.join(['#query', 'entity_id', 'score']))
        output_fh.write('\n')
        for result in results:
            output_fh.write('\t'.join(str(column) for column in result))
            output_fh.write('\n')
コード例 #10
0
def annotate(hpo_network, phenotype_to_diseases, num_diseases_annotated, alt2prim, annotations_file=None, ages_distribution_file=None,
            phenotype_disease_frequencies=None):
    """
    Cleans the HPO network.

    Removes non-phenotype branches of the network, and merges all synonyms into one tag.

    :param hpo_network: `networkx.MultiDiGraph` to clean.
    :param phenotype_to_diseases: Dictionary mapping HPO terms to diseases.
    :param num_diseases_annotated: Number of diseases with HPO annotations.
    :param alt2prim: The dict of alternate terms to canonical terms.
    :param annotations_file: A list of custom annotation files, in the same format as tests/data/test.score-long.txt
    :param ages: age distributions object
    :param phenotype_disease_frequencies: dictionary of phenotype to disease frequencies
    :param logger: Python `logging` logger instance.
    :param ages_distribution_file: Path to phenotypes ages distribution file.
    :return: `networkx.MultiDiGraph`
    """

    # Before calculating information content, check for custom_annotations_file and load
    custom_annos = None
    if annotations_file is not None:
        custom_annos = {}
        for record in parse_input(annotations_file, hpo_network, alt2prim):
            for term_id in record['terms']:
                if term_id not in custom_annos:
                    custom_annos[term_id] = []
                custom_annos[term_id].append(record['record_id'])

    # make ages distributions
    ages = None
    if ages_distribution_file is not None:
        try:
            ages = make_age_distributions(ages_distribution_file)
            logger.info(
                f'Adding custom phenotype age distributions to HPO nodes from file: {ages_distribution_file}'
            )
        except (FileNotFoundError, PermissionError) as e:
            logger.critical(e)
            logger.critical(
                f'Specified phenotype ages file could not be loaded or does not exist: {e}'
            )
            exit(1)

    for node_id, data in hpo_network.nodes(data=True):
        # annotate with information content value
        hpo_network.nodes[node_id]['ic'] = calculate_information_content(
            node_id,
            hpo_network,
            phenotype_to_diseases,
            num_diseases_annotated,
            custom_annos,
        )
        # annotate with phenotype age distribution
        hpo_network.nodes[node_id]['disease_weights'] = {}

        if ages is not None and node_id in ages.index:
            hpo_network.nodes[node_id]['age_dist'] = ages.loc[node_id]['age_dist']

        # add the disease_frequency weights as attributes to the node
        if phenotype_disease_frequencies is not None:
            if node_id in phenotype_disease_frequencies:
                for disease_id, frequency in phenotype_disease_frequencies[node_id].items():
                    hpo_network.nodes[node_id]['weights']['disease_frequency'][disease_id] = frequency

        # annotate with depth value
        # hard-coding origin node for now
        origin = 'HP:0000001'
        hpo_network.nodes[node_id]['depth'] = nx.shortest_path_length(
            hpo_network,
            node_id,
            origin
        )

        # clean synonyms
        synonyms = []
        try:
            for synonym in data['synonym']:
                synonyms.append(synonym)
            hpo_network.nodes[node_id]['synonyms'] = re.findall(r'"(.*?)"', ','.join(synonyms))
        except KeyError:
            # pass if no synonym tags in the node
            pass

    return hpo_network
コード例 #11
0
def score(query_hpo_file, records_file=None, query_name='SAMPLE', obo_file=None, pheno2genes_file=None, threads=1,
          agg_score='BMA', no_parents=False, custom_annotations_file=None, output_file=None):
    """
    Scores a case HPO terms against all genes associated HPO.

    :param query_hpo_file: File with case HPO terms, one per line.
    :param records_file: One record per line, tab delimited. First column record unique identifier, second column
        pipe separated list of HPO identifier (HP:0000001).
    :param query_name: Unique identifier for the query file.
    :param obo_file: OBO file from https://hpo.jax.org/app/download/ontology.
    :param pheno2genes_file: Phenotypes to genes from https://hpo.jax.org/app/download/annotation.
    :param threads: Number of parallel process to use.
    :param agg_score: The aggregation method to use for summarizing the similarity matrix between two term sets
        Must be one of {'BMA', 'maximum'}
    :param no_parents: If provided, scoring is done by only using the most informative nodes. All parent nodes are removed.
    :param custom_annotations_file: A custom entity-to-phenotype annotation file in the same format as tests/data/test.score-product.txt
    :param output_file: filepath where to store the results.
    """

    if agg_score not in {'BMA', 'maximum', }:
        logger.critical(
            'agg_score must be one of {BMA, maximum}.')
        exit(1)

    if obo_file is None:
        try:
            obo_file = config.get('hpo', 'obo_file')
        except (NoSectionError, NoOptionError):
            logger.critical(
                'No HPO OBO file provided and no "hpo:obo_file" found in the configuration file.')
            exit(1)

    if pheno2genes_file is None:
        try:
            pheno2genes_file = config.get('hpo', 'pheno2genes_file')
        except (NoSectionError, NoOptionError):
            logger.critical(
                'No HPO pheno2genes_file file provided and no "hpo:pheno2genes_file" found in the configuration file.'
            )
            exit(1)

    try:
        with open(query_hpo_file, 'r') as case_fh:
            case_hpo = case_fh.read().splitlines()
    except (FileNotFoundError, PermissionError) as e:
        logger.critical(e)
        exit(1)

    # load phenotypes to genes associations
    terms_to_genes, genes_to_terms, annotations_count = load_p2g(
        pheno2genes_file, logger=logger)

    # load hpo network
    hpo_network = _load_hpo_network(
        obo_file, terms_to_genes, annotations_count, custom_annotations_file)

    # create instance the scorer class
    scorer = Scorer(hpo_network)

    # multiprocessing objects
    manager = Manager()
    lock = manager.Lock()

    if no_parents is True:
        case_hpo = remove_parents(case_hpo, hpo_network)

    if records_file:
        # score and output case hpo terms against all genes associated set of hpo terms
        logger.info(
            f'Scoring HPO terms from file: {query_hpo_file} against entities in: {records_file}')

        records = read_records_file(records_file, no_parents, hpo_network, logger=logger)

        # include the case-to-iteslf
        records[query_name] = case_hpo
        if not output_file:
            sys.stdout.write('\t'.join(['#query', 'entity_id', 'score']))
            sys.stdout.write('\n')
            with Pool(threads) as p:
                p.starmap(scorer.score_pairs, [(records, [
                          (query_name, record) for record in records], lock, agg_score, i, threads) for i in range(threads)])
        else:
            with Pool(threads) as p:
                scored_results = p.starmap(scorer.score_pairs, [(records, [(query_name, record) for record in records],
                                                                 lock, agg_score, i, threads, False) for i in range(threads)])
            scored_results = [item for sublist in scored_results for item in sublist]
            scored_results_df = pd.DataFrame(data=scored_results, columns='#query,entity_id,score'.split(','))
            scored_results_df = scored_results_df.sort_values(by='score', ascending=False)
            scored_results_df.to_csv(output_file, sep='\t', index=False)
            logger.info(f'Scoring completed')
            logger.info(f'Writing results to file: {output_file}')

    else:
        # score and output case hpo terms against all genes associated set of hpo terms
        logger.info(f'Scoring case HPO terms from file: {query_hpo_file}')

        # add the case terms to the genes_to_terms dict
        genes_to_terms[query_name] = case_hpo
        if not output_file:
            sys.stdout.write('\t'.join(['#query', 'gene', 'score']))
            sys.stdout.write('\n')
            # iterate over each cross-product and score the pair of records
            with Pool(threads) as p:
                p.starmap(scorer.score_pairs, [(genes_to_terms, [
                          (query_name, gene) for gene in genes_to_terms], lock, agg_score, i, threads) for i in range(threads)])
        else:

            with Pool(threads) as p:
                scored_results = p.starmap(scorer.score_pairs, [(genes_to_terms,
                                     [(query_name, gene) for gene in genes_to_terms], lock, agg_score, i, threads, False)
                                                                for i in range(threads)])
            scored_results = [item for sublist in scored_results for item in sublist]
            scored_results_df = pd.DataFrame(data=scored_results, columns='#query,gene,score'.split(','))
            scored_results_df = scored_results_df.sort_values(by='score', ascending=False)
            scored_results_df.to_csv(output_file, sep='\t', index=False)
            logger.info(f'Scoring completed')
            logger.info(f'Writing results to file: {output_file}')
コード例 #12
0
def score_product(records_file, obo_file=None, pheno2genes_file=None, threads=1, agg_score='BMA', no_parents=False,
                  custom_annotations_file=None):
    """
    Scores the cartesian product of HPO terms from a list of unique records (cases, genes, diseases, etc).

    :param records_file: One record per line, tab delimited. First column record unique identifier, second column
        pipe separated list of HPO identifier (HP:0000001).
    :param obo_file: OBO file from https://hpo.jax.org/app/download/ontology.
    :param pheno2genes_file: Phenotypes to genes from https://hpo.jax.org/app/download/annotation.
    :param threads: Multiprocessing threads to use [default: 1].
    :param agg_score: The aggregation method to use for summarizing the similarity matrix between two term sets
        Must be one of {'BMA', 'maximum'}
    :param no_parents: If provided, scoring is done by only using the most informative nodes. All parent nodes are removed.
    :param custom_annotations_file: A custom entity-to-phenotype annotation file in the same format as tests/data/test.score-product.txt
    """
    if agg_score not in {'BMA', 'maximum', }:
        logger.critical(
            'agg_score must be one of {BMA, maximum}.')
        exit(1)

    if obo_file is None:
        try:
            obo_file = config.get('hpo', 'obo_file')
        except (NoSectionError, NoOptionError):
            logger.critical(
                'No HPO OBO file provided and no "hpo:obo_file" found in the configuration file.')
            exit(1)

    if pheno2genes_file is None:
        try:
            pheno2genes_file = config.get('hpo', 'pheno2genes_file')
        except (NoSectionError, NoOptionError):
            logger.critical(
                'No HPO pheno2genes_file file provided and no "hpo:pheno2genes_file" found in the configuration file.'
            )
            exit(1)

    # load phenotypes to genes associations
    terms_to_genes, _, annotations_count = load_p2g(
        pheno2genes_file, logger=logger)

    # load hpo network
    hpo_network = _load_hpo_network(
        obo_file, terms_to_genes, annotations_count, custom_annotations_file)

    # try except
    records = read_records_file(records_file, no_parents, hpo_network, logger=logger)

    logger.info(f'Scoring product of records from file: {records_file}')

    # create instance the scorer class
    scorer = Scorer(hpo_network)

    # create records product generator
    records_product = itertools.product(records.keys(), repeat=2)

    # iterate over each cross-product and score the pair of records
    manager = Manager()
    lock = manager.Lock()
    with Pool(threads) as p:
        p.starmap(scorer.score_pairs, [(records, records_product,
                                        lock, agg_score, i, threads) for i in range(threads)])