Esempio n. 1
0
    def test_quote(self):
        a = "word1 word2"
        self.assertEqual('"word1 word2"', ensure_quotes(a))

        b = "word1"
        self.assertEqual('word1', ensure_quotes(b))

        c = "word1$#"
        self.assertEqual('"word1$#"', ensure_quotes(c))
Esempio n. 2
0
 def test_str_has_both(self):
     namespace, identifier = n(), n()
     node = abundance(namespace=namespace, identifier=identifier)
     self.assertEqual(
         'a({namespace}:{identifier})'.format(
             namespace=namespace, identifier=ensure_quotes(identifier)),
         node.as_bel())
Esempio n. 3
0
 def test_str_has_name(self):
     namespace, name = n(), n()
     node = abundance(namespace=namespace, name=name)
     self.assertEqual(
         'a({namespace}:{name})'.format(namespace=namespace,
                                        name=ensure_quotes(name)),
         node.as_bel())
Esempio n. 4
0
    def _write_hierarchy_body(self, file=None):
        print('SET Citation = {{"{}","{}"}}'.format(CITATION_TYPE_URL,
                                                    self.version_iri),
              file=file)
        print('SET Evidence = "Automatically generated hierarchy from {}"\n'.
              format(self.version_iri),
              file=file)

        for parent, child in self._get_hierarchy():
            print(
                '{fn}({keyword}:{child}) {relation} {fn}({keyword}:{parent})'.
                format(fn=self.bel_function,
                       keyword=self.preferred_prefix,
                       relation=IS_A,
                       parent=ensure_quotes(parent),
                       child=ensure_quotes(child)),
                file=file)
Esempio n. 5
0
def write_hgnc_gene_families(file, df=None):
    """Writes the HGNC gene family hierarchy a BEL script.
    
    :param file file: A writable file or file-like
    :param pandas.DataFrame df: A data frame containing the original data source
    """
    df = get_data() if df is None else df

    write_boilerplate(
        document_name='HGNC Gene Family Definitions',
        authors='Charles Tapley Hoyt',
        contact='*****@*****.**',
        licenses='Creative Commons by 4.0',
        copyright=
        'Copyright (c) 2017 Charles Tapley Hoyt. All Rights Reserved.',
        description=
        """This BEL document represents the gene families curated by HGNC, describing various functional, structural, and logical classifications""",
        namespace_dict={
            'HGNC': HGNC_HUMAN_GENES,
            'GFAM': HGNC_GENE_FAMILIES,
        },
        namespace_patterns={},
        annotations_dict={'Confidence': CONFIDENCE},
        annotations_patterns={},
        file=file)

    print('SET Citation = {"PubMed","HGNC","25361968"}', file=file)
    print('SET Evidence = "HGNC Definitions"', file=file)
    print('SET Confidence = "Axiomatic"', file=file)

    for _, gfam, gene in df[['Gene family description',
                             'Approved Symbol']].itertuples():
        gfam_clean = ensure_quotes(gfam.strip())
        gene_clean = ensure_quotes(gene.strip())

        print('g(HGNC:{}) {} g(GFAM:{})'.format(gene_clean, IS_A, gfam_clean),
              file=file)
Esempio n. 6
0
def write_bel_association(abundance1,
                          namespace1,
                          accession1,
                          abundance2,
                          namespace2,
                          accession2,
                          file=None):
    """Prints a BEL association.

    :param str abundance1: Abundance of the subject
    :param str namespace1: Namespace of the subject
    :param str accession1: Identifier of the subject
    :param str abundance2: Abundance of the object
    :param str namespace2: Namespace of the object
    :param str accession2: Identifier of the object
    :param file file: A writeable file or file like. Defaults to stdout
    """
    print('{}({}:{}) -- {}({}:{})'.format(ensure_quotes(abundance1),
                                          ensure_quotes(namespace1),
                                          ensure_quotes(accession1),
                                          ensure_quotes(abundance2),
                                          ensure_quotes(namespace2),
                                          ensure_quotes(accession2)),
          file=file)
Esempio n. 7
0
def get_neurommsig_bel(
    df: pd.DataFrame,
    disease: str,
    nift_values: Mapping[str, str],
) -> BELGraph:
    """Generate the NeuroMMSig BEL graph.

    :param df:
    :param disease:
    :param nift_values: a dictionary of lower-cased to normal names in NIFT
    """
    missing_features = set()
    fixed_caps = set()
    nift_value_originals = set(nift_values.values())

    graph = BELGraph(
        name=f'NeuroMMSigDB for {disease}',
        description=f'SNP and Clinical Features for Subgraphs in {disease}',
        authors=
        'Daniel Domingo-Fernández, Charles Tapley Hoyt, Mufassra Naz, Aybuge Altay, Anandhi Iyappan',
        contact='*****@*****.**',
        version=time.strftime('%Y%m%d'),
    )

    for pathway, pathway_df in df.groupby(PATHWAY_COLUMN_NAME):
        sorted_pathway_df = pathway_df.sort_values(GENE_COLUMN_NAME)
        sliced_df = sorted_pathway_df[columns].itertuples()

        for _, gene, pubmeds, lit_snps, gwas_snps, ld_block_snps, clinical_features, clinical_snps in sliced_df:
            gene = ensure_quotes(gene)

            for snp in itt.chain(lit_snps or [], gwas_snps or [], ld_block_snps
                                 or [], clinical_snps or []):
                if not snp.strip():
                    continue
                graph.add_association(
                    Gene('HGNC', gene),
                    Gene('DBSNP', snp),
                    evidence=CANNED_EVIDENCE,
                    citation=CANNED_CITATION,
                    annotations={
                        'MeSHDisease': disease,
                    },
                )

            for clinical_feature in clinical_features or []:
                if not clinical_feature.strip():
                    continue

                if clinical_feature.lower() not in nift_values:
                    missing_features.add(clinical_feature)
                    continue

                if clinical_feature not in nift_value_originals:
                    fixed_caps.add((clinical_feature,
                                    nift_values[clinical_feature.lower()]))
                    clinical_feature = nift_values[
                        clinical_feature.lower()]  # fix capitalization

                graph.add_association(
                    Gene('HGNC', gene),
                    Abundance('NIFT', clinical_feature),
                    evidence=CANNED_EVIDENCE,
                    citation=CANNED_CITATION,
                    annotations={
                        'MeSHDisease': disease,
                    },
                )

                if clinical_snps:
                    for clinical_snp in clinical_snps:
                        graph.add_association(
                            Gene('DBSNP', clinical_snp),
                            Abundance('NIFT', clinical_feature),
                            evidence=CANNED_EVIDENCE,
                            citation=CANNED_CITATION,
                            annotations={
                                'MeSHDisease': disease,
                            },
                        )

    if missing_features:
        logger.warning('Missing Features in %s', disease)
        for feature in missing_features:
            logger.warning(feature)

    if fixed_caps:
        logger.warning('Fixed capitalization')
        for broken, fixed in fixed_caps:
            logger.warning('%s -> %s', broken, fixed)

    return graph
Esempio n. 8
0
def write_neurommsig_bel(file, df, disease, nift_values):
    """Writes the NeuroMMSigDB excel sheet to BEL

    :param file: a file or file-like that can be writen to
    :param pandas.DataFrame df: 
    :param str disease: 
    :param dict nift_values: a dictionary of lowercased to normal names in NIFT
    """
    write_boilerplate(
        document_name='NeuroMMSigDB for {}'.format(disease),
        description='SNP and Clinical Features for Subgraphs in {}'.format(disease),
        authors='Daniel Domingo, Charles Tapley Hoyt, Mufassra Naz, Aybuge Altay, Anandhi Iyappan',
        contact='*****@*****.**',
        namespace_dict={
            'NIFT': NIFT_URL,
            'HGNC': FRAUNHOFER_RESOURCES + 'hgnc-human-genes.belns',
        },
        namespace_patterns={
            'dbSNP': DBSNP_PATTERN
        },
        annotations_dict={
            'Subgraph': FRAUNHOFER_RESOURCES + 'subgraph.belanno',
            'MeSHDisease': FRAUNHOFER_RESOURCES + 'mesh-diseases.belanno'
        },
        file=file
    )

    print('SET Citation = {"Other", "NeuroMMSigDB", "http://neurommsig.scai.fraunhofer.de/"}', file=file)
    print('SET Evidence = "Serialized from NeuroMMSigDB"', file=file)
    print('SET MeSHDisease = "{}"\n'.format(disease), file=file)

    missing_features = set()
    fixed_caps = set()
    nift_value_originals = set(nift_values.values())

    for pathway, pathway_df in df.groupby(pathway_column):
        print('SET Subgraph = "{}"'.format(pathway), file=file)

        for _, gene, lit_snps, gwas_snps, clinical_features, clinical_snp in pathway_df[columns].itertuples():
            gene = ensure_quotes(gene)

            if lit_snps is None:
                lit_snps = []

            if gwas_snps is None:
                gwas_snps = []

            if clinical_snp is None:
                clinical_snp = []

            for snp in itt.chain(lit_snps, gwas_snps, clinical_snp):
                if not snp.strip():
                    continue
                print('g(HGNC:{}) -- g(dbSNP:{})'.format(gene, snp), file=file)

            for clinical_feature in clinical_features or []:
                if not clinical_feature.strip():
                    continue
                if clinical_feature.lower() not in nift_values:
                    missing_features.add(clinical_feature)
                    continue
                if clinical_feature not in nift_value_originals:
                    fixed_caps.add((clinical_feature, nift_values[clinical_feature.lower()]))
                    clinical_feature = nift_values[clinical_feature.lower()]  # fix capitalization
                print('g(HGNC:{}) -- a(NIFT:{})'.format(gene, ensure_quotes(clinical_feature)), file=file)

        print('UNSET Subgraph\n', file=file)

    print('UNSET MeSHDisease', file=file)
    print('UNSET Evidence', file=file)
    print('UNSET Citation', file=file)

    log.warning('Missing Features in %s', disease)
    for feature in missing_features:
        log.warning(feature)

    log.warning('Fixed capitalization')
    for broken, fixed in fixed_caps:
        log.warning('%s -> %s', broken, fixed)
Esempio n. 9
0
def write_neurommsig_bel(file, df, disease, nift_values):
    """Writes the NeuroMMSigDB excel sheet to BEL

    :param file: a file or file-like that can be writen to
    :param pandas.DataFrame df: 
    :param str disease: 
    :param dict nift_values: a dictionary of lowercased to normal names in NIFT
    """
    write_neurommsig_biolerplate(disease, file)

    missing_features = set()
    fixed_caps = set()
    nift_value_originals = set(nift_values.values())

    for pathway, pathway_df in df.groupby(pathway_column):
        print('SET Subgraph = "{}"'.format(pathway), file=file)

        sorted_pathway_df = pathway_df.sort_values(genes_column)
        sliced_df = sorted_pathway_df[columns].itertuples()

        for _, gene, pubmeds, lit_snps, gwas_snps, ld_block_snps, clinical_features, clinical_snps in sliced_df:
            gene = ensure_quotes(gene)

            for snp in itt.chain(lit_snps or [], gwas_snps or [], ld_block_snps or [], clinical_snps or []):
                if not snp.strip():
                    continue
                print('g(HGNC:{}) -- g(dbSNP:{})'.format(gene, snp), file=file)

            for clinical_feature in clinical_features or []:
                if not clinical_feature.strip():
                    continue

                if clinical_feature.lower() not in nift_values:
                    missing_features.add(clinical_feature)
                    continue

                if clinical_feature not in nift_value_originals:
                    fixed_caps.add((clinical_feature, nift_values[clinical_feature.lower()]))
                    clinical_feature = nift_values[clinical_feature.lower()]  # fix capitalization

                print('g(HGNC:{}) -- a(NIFT:{})'.format(gene, ensure_quotes(clinical_feature)), file=file)

                if clinical_snps:
                    for clinical_snp in clinical_snps:
                        print('g(dbSNP:{} -- a(NIFT:{})'.format(clinical_snp, ensure_quotes(clinical_feature)),
                              file=file)

        print('UNSET Subgraph\n', file=file)

    print('UNSET MeSHDisease', file=file)
    print('UNSET Evidence', file=file)
    print('UNSET Citation', file=file)

    if missing_features:
        log.warning('Missing Features in %s', disease)
    for feature in missing_features:
        log.warning(feature)

    if fixed_caps:
        log.warning('Fixed capitalization')
    for broken, fixed in fixed_caps:
        log.warning('%s -> %s', broken, fixed)