Exemple #1
0
def get_names_including_errors_by_namespace(graph: BELGraph, namespace: str) -> Set[str]:
    """Get all names appearing in the graph, including erroneous names, for the given namespace.

    Takes the names from the graph in a given namespace (:func:`pybel.struct.summary.get_names_by_namespace`) and
    the erroneous names from the same namespace (:func:`get_incorrect_names_by_namespace`) and returns them together
    as a unioned set

    :return: The set of all correct and incorrect names from the given namespace in the graph
    """
    return get_names_by_namespace(graph, namespace) | get_incorrect_names_by_namespace(graph, namespace)
Exemple #2
0
def get_names_including_errors_by_namespace(graph, namespace):
    """Takes the names from the graph in a given namespace and the erroneous names from the same namespace and returns
    them together as a unioned set

    :param pybel.BELGraph graph: A BEL graph
    :param str namespace: The namespace to filter by
    :return: The set of all correct and incorrect names from the given namespace in the graph
    :rtype: set[str]
    """
    return get_names_by_namespace(graph, namespace) | get_incorrect_names_by_namespace(graph, namespace)
Exemple #3
0
    def test_names_sialic(self):
        """Test getting and counting names by namespace."""
        result = {
            'CD33': 3, # once as reference, once in complex, and once as variant
            'TYROBP': 1,
            'SYK': 1,
            'PTPN6': 1,
            'PTPN11': 1,
            'TREM2': 1,
        }

        self.assertEqual(set(result), get_names_by_namespace(sialic_acid_graph, 'HGNC'))
        self.assertEqual(result, dict(count_names_by_namespace(sialic_acid_graph, 'HGNC')))
Exemple #4
0
    def test_summarize_sialic(self):
        """Test getting and counting namespaces' contents on the sialic acid graph."""
        namespace_result = {
            'hgnc': 8,
            'chebi': 2,
            'bel': 3,
        }
        self.assertEqual(set(namespace_result),
                         get_namespaces(sialic_acid_graph))
        self.assertEqual(Counter(namespace_result),
                         count_namespaces(sialic_acid_graph))

        hgnc_result = {
            'CD33':
            3,  # once as reference, once in complex, and once as variant
            'TYROBP': 1,
            'SYK': 1,
            'PTPN6': 1,
            'PTPN11': 1,
            'TREM2': 1,
        }
        chebi_result = {
            'sialic acid': 2,
        }
        names = get_names(sialic_acid_graph)
        self.assertEqual(set(namespace_result), set(names))
        self.assertEqual(set(hgnc_result), names['hgnc'])
        self.assertEqual(set(chebi_result), names['chebi'])
        self.assertEqual(set(hgnc_result),
                         get_names_by_namespace(sialic_acid_graph, 'hgnc'))
        self.assertEqual(set(chebi_result),
                         get_names_by_namespace(sialic_acid_graph, 'chebi'))
        self.assertEqual(
            hgnc_result,
            dict(count_names_by_namespace(sialic_acid_graph, 'hgnc')))
        self.assertEqual(
            chebi_result,
            dict(count_names_by_namespace(sialic_acid_graph, 'chebi')))
Exemple #5
0
    def test_names_sialic(self):
        result = {
            'CD33': 2,
            'TYROBP': 1,
            'SYK': 1,
            'PTPN6': 1,
            'PTPN11': 1,
            'TREM2': 1,
        }

        self.assertEqual(set(result),
                         get_names_by_namespace(sialic_acid_graph, 'HGNC'))
        self.assertEqual(result,
                         count_names_by_namespace(sialic_acid_graph, 'HGNC'))
Exemple #6
0
    def test_names_sialic(self):
        """Test getting and counting names by namespace."""
        result = {
            'CD33': 2,
            'TYROBP': 1,
            'SYK': 1,
            'PTPN6': 1,
            'PTPN11': 1,
            'TREM2': 1,
        }

        self.assertEqual(set(result),
                         get_names_by_namespace(sialic_acid_graph, 'HGNC'))
        self.assertEqual(Counter(result),
                         count_names_by_namespace(sialic_acid_graph, 'HGNC'))
Exemple #7
0
def export_namespace(
    graph: BELGraph,
    namespace: str,
    directory: Optional[str] = None,
    cacheable: bool = False,
) -> None:
    """Export all names and missing names from the given namespace to its own BELNS file in the given directory.

    Could be useful during quick and dirty curation, where planned namespace building is not a priority.

    :param pybel.BELGraph graph: A BEL graph
    :param namespace: The namespace to process
    :param directory: The path to the directory where to output the namespace. Defaults to the current working
     directory returned by :func:`os.getcwd`
    :param cacheable: Should the namespace be cacheable? Defaults to ``False`` because, in general, this operation
     will probably be used for evil, and users won't want to reload their entire cache after each iteration of curation.
    """
    directory = os.getcwd() if directory is None else directory
    path = os.path.join(directory, f'{namespace}.belns')

    logger.info('Outputting to %s', path)
    right_names = get_names_by_namespace(graph, namespace)
    logger.info('Graph has %d correct names in %s', len(right_names), namespace)
    wrong_names = get_incorrect_names_by_namespace(graph, namespace)
    logger.info('Graph has %d incorrect names in %s', len(wrong_names), namespace)
    undefined_ns_names = get_undefined_namespace_names(graph, namespace)
    logger.info('Graph has %d names in missing namespace %s', len(undefined_ns_names), namespace)

    names = (right_names | wrong_names | undefined_ns_names)

    if 0 == len(names):
        logger.warning(f'{namespace} is empty')

    with open(path, 'w') as file:
        write_namespace(
            namespace_name=namespace,
            namespace_keyword=namespace,
            namespace_domain='Other',
            author_name=graph.authors,
            author_contact=graph.contact,
            citation_name=graph.name,
            values=names,
            cacheable=cacheable,
            file=file,
        )
    def test_names_fusions(self):
        """Test that names inside fusions are still found by the iterator."""
        graph = BELGraph()
        graph.namespace_url['HGNC'] = 'http://dummy'

        node = protein_fusion(partner_5p=protein(name='A', namespace='HGNC'),
                              range_5p=fusion_range('p', 1, 15),
                              partner_3p=protein(name='B', namespace='HGNC'),
                              range_3p=fusion_range('p', 1, 100))

        graph.add_node_from_data(node)

        result = {
            'A': 1,
            'B': 1,
        }

        self.assertEqual(set(result), get_names_by_namespace(graph, 'HGNC'))
        self.assertEqual(result, count_names_by_namespace(graph, 'HGNC'))
 def test_get_names_raise(self):
     """Test that an index error is raised when trying to get names from a namespace that isn't present."""
     with self.assertRaises(IndexError):
         get_names_by_namespace(sialic_acid_graph, 'NOPE')
Exemple #10
0
 def test_get_names_raise(self):
     with self.assertRaises(IndexError):
         get_names_by_namespace(sialic_acid_graph, 'NOPE')