def test_quote(self): a = "word1 word2" self.assertEqual('"word1 word2"', ensure_quotes(a)) b = "word1" self.assertEqual('word1', ensure_quotes(b)) c = "word1$#" self.assertEqual('"word1$#"', ensure_quotes(c))
def test_str_has_both(self): namespace, identifier = n(), n() node = abundance(namespace=namespace, identifier=identifier) self.assertEqual( 'a({namespace}:{identifier})'.format( namespace=namespace, identifier=ensure_quotes(identifier)), node.as_bel())
def test_str_has_name(self): namespace, name = n(), n() node = abundance(namespace=namespace, name=name) self.assertEqual( 'a({namespace}:{name})'.format(namespace=namespace, name=ensure_quotes(name)), node.as_bel())
def _write_hierarchy_body(self, file=None): print('SET Citation = {{"{}","{}"}}'.format(CITATION_TYPE_URL, self.version_iri), file=file) print('SET Evidence = "Automatically generated hierarchy from {}"\n'. format(self.version_iri), file=file) for parent, child in self._get_hierarchy(): print( '{fn}({keyword}:{child}) {relation} {fn}({keyword}:{parent})'. format(fn=self.bel_function, keyword=self.preferred_prefix, relation=IS_A, parent=ensure_quotes(parent), child=ensure_quotes(child)), file=file)
def write_hgnc_gene_families(file, df=None): """Writes the HGNC gene family hierarchy a BEL script. :param file file: A writable file or file-like :param pandas.DataFrame df: A data frame containing the original data source """ df = get_data() if df is None else df write_boilerplate( document_name='HGNC Gene Family Definitions', authors='Charles Tapley Hoyt', contact='*****@*****.**', licenses='Creative Commons by 4.0', copyright= 'Copyright (c) 2017 Charles Tapley Hoyt. All Rights Reserved.', description= """This BEL document represents the gene families curated by HGNC, describing various functional, structural, and logical classifications""", namespace_dict={ 'HGNC': HGNC_HUMAN_GENES, 'GFAM': HGNC_GENE_FAMILIES, }, namespace_patterns={}, annotations_dict={'Confidence': CONFIDENCE}, annotations_patterns={}, file=file) print('SET Citation = {"PubMed","HGNC","25361968"}', file=file) print('SET Evidence = "HGNC Definitions"', file=file) print('SET Confidence = "Axiomatic"', file=file) for _, gfam, gene in df[['Gene family description', 'Approved Symbol']].itertuples(): gfam_clean = ensure_quotes(gfam.strip()) gene_clean = ensure_quotes(gene.strip()) print('g(HGNC:{}) {} g(GFAM:{})'.format(gene_clean, IS_A, gfam_clean), file=file)
def write_bel_association(abundance1, namespace1, accession1, abundance2, namespace2, accession2, file=None): """Prints a BEL association. :param str abundance1: Abundance of the subject :param str namespace1: Namespace of the subject :param str accession1: Identifier of the subject :param str abundance2: Abundance of the object :param str namespace2: Namespace of the object :param str accession2: Identifier of the object :param file file: A writeable file or file like. Defaults to stdout """ print('{}({}:{}) -- {}({}:{})'.format(ensure_quotes(abundance1), ensure_quotes(namespace1), ensure_quotes(accession1), ensure_quotes(abundance2), ensure_quotes(namespace2), ensure_quotes(accession2)), file=file)
def get_neurommsig_bel( df: pd.DataFrame, disease: str, nift_values: Mapping[str, str], ) -> BELGraph: """Generate the NeuroMMSig BEL graph. :param df: :param disease: :param nift_values: a dictionary of lower-cased to normal names in NIFT """ missing_features = set() fixed_caps = set() nift_value_originals = set(nift_values.values()) graph = BELGraph( name=f'NeuroMMSigDB for {disease}', description=f'SNP and Clinical Features for Subgraphs in {disease}', authors= 'Daniel Domingo-Fernández, Charles Tapley Hoyt, Mufassra Naz, Aybuge Altay, Anandhi Iyappan', contact='*****@*****.**', version=time.strftime('%Y%m%d'), ) for pathway, pathway_df in df.groupby(PATHWAY_COLUMN_NAME): sorted_pathway_df = pathway_df.sort_values(GENE_COLUMN_NAME) sliced_df = sorted_pathway_df[columns].itertuples() for _, gene, pubmeds, lit_snps, gwas_snps, ld_block_snps, clinical_features, clinical_snps in sliced_df: gene = ensure_quotes(gene) for snp in itt.chain(lit_snps or [], gwas_snps or [], ld_block_snps or [], clinical_snps or []): if not snp.strip(): continue graph.add_association( Gene('HGNC', gene), Gene('DBSNP', snp), evidence=CANNED_EVIDENCE, citation=CANNED_CITATION, annotations={ 'MeSHDisease': disease, }, ) for clinical_feature in clinical_features or []: if not clinical_feature.strip(): continue if clinical_feature.lower() not in nift_values: missing_features.add(clinical_feature) continue if clinical_feature not in nift_value_originals: fixed_caps.add((clinical_feature, nift_values[clinical_feature.lower()])) clinical_feature = nift_values[ clinical_feature.lower()] # fix capitalization graph.add_association( Gene('HGNC', gene), Abundance('NIFT', clinical_feature), evidence=CANNED_EVIDENCE, citation=CANNED_CITATION, annotations={ 'MeSHDisease': disease, }, ) if clinical_snps: for clinical_snp in clinical_snps: graph.add_association( Gene('DBSNP', clinical_snp), Abundance('NIFT', clinical_feature), evidence=CANNED_EVIDENCE, citation=CANNED_CITATION, annotations={ 'MeSHDisease': disease, }, ) if missing_features: logger.warning('Missing Features in %s', disease) for feature in missing_features: logger.warning(feature) if fixed_caps: logger.warning('Fixed capitalization') for broken, fixed in fixed_caps: logger.warning('%s -> %s', broken, fixed) return graph
def write_neurommsig_bel(file, df, disease, nift_values): """Writes the NeuroMMSigDB excel sheet to BEL :param file: a file or file-like that can be writen to :param pandas.DataFrame df: :param str disease: :param dict nift_values: a dictionary of lowercased to normal names in NIFT """ write_boilerplate( document_name='NeuroMMSigDB for {}'.format(disease), description='SNP and Clinical Features for Subgraphs in {}'.format(disease), authors='Daniel Domingo, Charles Tapley Hoyt, Mufassra Naz, Aybuge Altay, Anandhi Iyappan', contact='*****@*****.**', namespace_dict={ 'NIFT': NIFT_URL, 'HGNC': FRAUNHOFER_RESOURCES + 'hgnc-human-genes.belns', }, namespace_patterns={ 'dbSNP': DBSNP_PATTERN }, annotations_dict={ 'Subgraph': FRAUNHOFER_RESOURCES + 'subgraph.belanno', 'MeSHDisease': FRAUNHOFER_RESOURCES + 'mesh-diseases.belanno' }, file=file ) print('SET Citation = {"Other", "NeuroMMSigDB", "http://neurommsig.scai.fraunhofer.de/"}', file=file) print('SET Evidence = "Serialized from NeuroMMSigDB"', file=file) print('SET MeSHDisease = "{}"\n'.format(disease), file=file) missing_features = set() fixed_caps = set() nift_value_originals = set(nift_values.values()) for pathway, pathway_df in df.groupby(pathway_column): print('SET Subgraph = "{}"'.format(pathway), file=file) for _, gene, lit_snps, gwas_snps, clinical_features, clinical_snp in pathway_df[columns].itertuples(): gene = ensure_quotes(gene) if lit_snps is None: lit_snps = [] if gwas_snps is None: gwas_snps = [] if clinical_snp is None: clinical_snp = [] for snp in itt.chain(lit_snps, gwas_snps, clinical_snp): if not snp.strip(): continue print('g(HGNC:{}) -- g(dbSNP:{})'.format(gene, snp), file=file) for clinical_feature in clinical_features or []: if not clinical_feature.strip(): continue if clinical_feature.lower() not in nift_values: missing_features.add(clinical_feature) continue if clinical_feature not in nift_value_originals: fixed_caps.add((clinical_feature, nift_values[clinical_feature.lower()])) clinical_feature = nift_values[clinical_feature.lower()] # fix capitalization print('g(HGNC:{}) -- a(NIFT:{})'.format(gene, ensure_quotes(clinical_feature)), file=file) print('UNSET Subgraph\n', file=file) print('UNSET MeSHDisease', file=file) print('UNSET Evidence', file=file) print('UNSET Citation', file=file) log.warning('Missing Features in %s', disease) for feature in missing_features: log.warning(feature) log.warning('Fixed capitalization') for broken, fixed in fixed_caps: log.warning('%s -> %s', broken, fixed)
def write_neurommsig_bel(file, df, disease, nift_values): """Writes the NeuroMMSigDB excel sheet to BEL :param file: a file or file-like that can be writen to :param pandas.DataFrame df: :param str disease: :param dict nift_values: a dictionary of lowercased to normal names in NIFT """ write_neurommsig_biolerplate(disease, file) missing_features = set() fixed_caps = set() nift_value_originals = set(nift_values.values()) for pathway, pathway_df in df.groupby(pathway_column): print('SET Subgraph = "{}"'.format(pathway), file=file) sorted_pathway_df = pathway_df.sort_values(genes_column) sliced_df = sorted_pathway_df[columns].itertuples() for _, gene, pubmeds, lit_snps, gwas_snps, ld_block_snps, clinical_features, clinical_snps in sliced_df: gene = ensure_quotes(gene) for snp in itt.chain(lit_snps or [], gwas_snps or [], ld_block_snps or [], clinical_snps or []): if not snp.strip(): continue print('g(HGNC:{}) -- g(dbSNP:{})'.format(gene, snp), file=file) for clinical_feature in clinical_features or []: if not clinical_feature.strip(): continue if clinical_feature.lower() not in nift_values: missing_features.add(clinical_feature) continue if clinical_feature not in nift_value_originals: fixed_caps.add((clinical_feature, nift_values[clinical_feature.lower()])) clinical_feature = nift_values[clinical_feature.lower()] # fix capitalization print('g(HGNC:{}) -- a(NIFT:{})'.format(gene, ensure_quotes(clinical_feature)), file=file) if clinical_snps: for clinical_snp in clinical_snps: print('g(dbSNP:{} -- a(NIFT:{})'.format(clinical_snp, ensure_quotes(clinical_feature)), file=file) print('UNSET Subgraph\n', file=file) print('UNSET MeSHDisease', file=file) print('UNSET Evidence', file=file) print('UNSET Citation', file=file) if missing_features: log.warning('Missing Features in %s', disease) for feature in missing_features: log.warning(feature) if fixed_caps: log.warning('Fixed capitalization') for broken, fixed in fixed_caps: log.warning('%s -> %s', broken, fixed)