예제 #1
0
def load_graphs_synthetic():
    """Load the SYNTHETIC graph dataset for graph classification from:      
        Feragen, A., Kasenburg, N., Petersen, J., de Bruijne, M., & Borgwardt, K. M. (2013)
        Scalable kernels for graphs with continuous attributes.
        In Neural Information Processing Systems (NIPS) 2013 (pp. 216–224).
        Retrieved from http://papers.nips.cc/paper/5155-scalable-kernels-for-graphs-with-continuous-attributes.pdf
    Returns
    -------
    data : Bunch
        Dictionary-like object with the following attributes :
        'graphs', the graphs in the dataset in Networkx format,  'target', the classification labels for each
        sample.
    """
    input_target_url = 'http://www.math.unipd.it/~nnavarin/datasets/SYNTHETICnew.labels'
    input_data_url = 'http://www.math.unipd.it/~nnavarin/datasets/SYNTHETICnew.gspan'
    #input_target_url='datasets/ENZYMES.labels'
    #input_data_url='datasets/ENZYMES.gspan'

    _target = load_target(input_target_url)
    g_it = instance_to_graph(input=input_data_url)  #url
    #return Bunch(data=flat_data,
    #            target=target.astype(np.int),
    #           target_names=np.arange(10),
    #            images=images,
    #            DESCR=descr)
    g = [i for i in g_it]
    for i in g:
        for n in i.nodes():
            i.node[n]['label'] = str(i.degree(n))

    print 'Loaded SYNTHETIC graph dataset for graph classification from:'
    print 'Feragen, A., Kasenburg, N., Petersen, J., de Bruijne, M., & Borgwardt, K. M. (2013)'
    print 'Scalable kernels for graphs with continuous attributes.'
    print 'In Neural Information Processing Systems (NIPS) 2013 (pp. 216–224).'
    return Bunch(graphs=g, target=_target, labels=True, veclabels=True)
예제 #2
0
def load_graphs_PROTEINS_full():
    """Load the PROTEINS_full graph dataset for graph classification from:
        Neumann, M., Garnett R., Bauckhage Ch., Kersting K.: Propagation Kernels: Efficient Graph 
        Kernels from Propagated Information. Under review at MLJ.

    Returns
    -------
    data : Bunch
        Dictionary-like object with the following attributes :
        'graphs', the graphs in the dataset in Networkx format,  'target', the classification labels for each
        sample.
    """
    input_target_url = 'http://www.math.unipd.it/~nnavarin/datasets/PROTEINS_full_graph_labels.txt'
    input_data_url = 'http://www.math.unipd.it/~nnavarin/datasets/PROTEINS_full.gspan'
    #input_target_url='datasets/ENZYMES.labels'
    #input_data_url='datasets/ENZYMES.gspan'

    _target = load_target(input_target_url)
    g_it = instance_to_graph(input=input_data_url)  #url
    #return Bunch(data=flat_data,
    #            target=target.astype(np.int),
    #           target_names=np.arange(10),
    #            images=images,
    #            DESCR=descr)
    print 'Loaded PROTEINS_full graph dataset for  graph classification from:'
    print 'Neumann, M., Garnett R., Bauckhage Ch., Kersting K.: Propagation Kernels: Efficient Graph'
    print 'Kernels from Propagated Information. MLJ 2015.'
    return Bunch(graphs=[i for i in g_it],
                 target=_target,
                 labels=True,
                 veclabels=True)
예제 #3
0
def load_graphs_proteins():
    """Load the PROTEINS graph dataset for graph classification from:
        Dobson, P. D., & Doig, A. J. (2003)
        Distinguishing enzyme structures from non-enzymes without alignments.
        Journal of Molecular Biology, 330, 771–783. doi:10.1016/S0022-2836(03)00628-4

    Returns
    -------
    data : Bunch
        Dictionary-like object with the following attributes :
        'graphs', the graphs in the dataset in Networkx format,  'target', the classification labels for each
        sample.
    """
    input_target_url = 'http://www.math.unipd.it/~nnavarin/datasets/PROTEINS.labels'
    input_data_url = 'http://www.math.unipd.it/~nnavarin/datasets/PROTEINS.gspan'

    _target = load_target(input_target_url)
    g_it = instance_to_graph(input=input_data_url)  #url

    print 'Loaded PROTEINS graph dataset for graph classification from:'
    print 'Dobson, P. D., & Doig, A. J. (2003)'
    print 'Distinguishing enzyme structures from non-enzymes without alignments.'
    print 'Journal of Molecular Biology, 330, 771–783. doi:10.1016/S0022-2836(03)00628-4'
    return Bunch(graphs=[i for i in g_it],
                 target=_target,
                 labels=True,
                 veclabels=True)
예제 #4
0
def load_graphs_enzymes():
    """Load the ENZYMES graph dataset for (multiclass) graph classification from:
    Schomburg, I., Chang, A., Ebeling, C., Gremse, M., Heldt, C., Huhn, G., & Schomburg, D. (2004).
    BRENDA, the enzyme database: updates and major new developments.
    Nucleic Acids Research, 32, D431–D433. doi:10.1093/nar/gkh081

    Returns
    -------
    data : Bunch
        Dictionary-like object with the following attributes :
        'graphs', the graphs in the dataset in Networkx format,  'target', the classification labels for each
        sample.
    """
    input_target_url = 'http://www.math.unipd.it/~nnavarin/datasets/ENZYMES.labels'
    input_data_url = 'http://www.math.unipd.it/~nnavarin/datasets/ENZYMES.gspan'
    #input_target_url='datasets/ENZYMES.labels'
    #input_data_url='datasets/ENZYMES.gspan'

    _target = load_target(input_target_url)
    g_it = instance_to_graph(input=input_data_url)  #url
    #return Bunch(data=flat_data,
    #            target=target.astype(np.int),
    #           target_names=np.arange(10),
    #            images=images,
    #            DESCR=descr)
    print 'Loaded ENZYMES graph dataset for (multiclass) graph classification from:'
    print 'Schomburg, I., Chang, A., Ebeling, C., Gremse, M., Heldt, C., Huhn, G., & Schomburg, D. (2004).'
    print 'BRENDA, the enzyme database: updates and major new developments.'
    print 'Nucleic Acids Research, 32, D431–D433. doi:10.1093/nar/gkh081'
    return Bunch(graphs=[i for i in g_it],
                 target=_target,
                 labels=True,
                 veclabels=True)
예제 #5
0
def load_graphs_GDD():
    """Load the GDD graph dataset for graph classification..

    Returns
    -------
    data : Bunch
        Dictionary-like object with the following attributes :
        'graphs', the graphs in the dataset in Networkx format,  'target', the classification labels for each
        sample.
    """
    input_target_url = 'http://www.math.unipd.it/~nnavarin/datasets/GDD/GDD_labels.txt'
    input_data_url = 'http://www.math.unipd.it/~nnavarin/datasets/GDD/graphs.gspan'
    _target = load_target(input_target_url)
    g_it = instance_to_graph(input=input_data_url)
    gra = [i for i in g_it]
    print 'Loaded GDD graph dataset for graph classification.'
    print len(gra), 'graphs.'
    return Bunch(graphs=gra, target=_target, labels=True, veclabels=False)
예제 #6
0
def load_graphs_NCI_AIDS():
    """Load the NCI antiHIV graph dataset for graph classification..

    Returns
    -------
    data : Bunch
        Dictionary-like object with the following attributes :
        'graphs', the graphs in the dataset in Networkx format,  'target', the classification labels for each
        sample.
    """
    input_target_url = 'http://www.math.unipd.it/~nnavarin/datasets/NCI_AIDS/AIDO99SD_numeric.labels'
    input_data_url = 'http://www.math.unipd.it/~nnavarin/datasets/NCI_AIDS/AIDO99SD.gspan'
    _target = load_target(input_target_url)
    g_it = instance_to_graph(input=input_data_url)

    print 'Loaded NCI antiHIV dataset graph dataset for graph classification.'
    return Bunch(graphs=[i for i in g_it],
                 target=_target,
                 labels=True,
                 veclabels=False)
예제 #7
0
def load_graphs_bursi():
    """Load the Bursi graph dataset for graph classification..

    Returns
    -------
    data : Bunch
        Dictionary-like object with the following attributes :
        'graphs', the graphs in the dataset in Networkx format,  'target', the classification labels for each
        sample.
    """
    input_target_url = 'http://www.bioinf.uni-freiburg.de/~costa/bursi.target'
    input_data_url = 'http://www.bioinf.uni-freiburg.de/~costa/bursi.gspan'
    _target = load_target(input_target_url)
    g_it = instance_to_graph(input=input_data_url)

    print 'Loaded Bursi graph dataset for graph classification.'
    return Bunch(graphs=[i for i in g_it],
                 target=_target,
                 labels=True,
                 veclabels=False)
예제 #8
0
def load_graphs_LMdata():
    """Load the LMdata graph dataset for graph classification..

    Returns
    -------
    data : Bunch
        Dictionary-like object with the following attributes :
        'graphs', the graphs in the dataset in Networkx format,  'target', the classification labels for each
        sample.
    """
    input_target_url = 'http://www.math.unipd.it/~nnavarin/datasets/LMdata/labels.txt.standardized'
    input_data_url = 'http://www.math.unipd.it/~nnavarin/datasets//LMdata/graphs.gspan.standardized'
    _target = load_target(input_target_url)
    label_dict = {}
    counter = [1]
    g_it = instance_to_graph(input_data_url, label_dict, counter)

    print 'Loaded LMdata graph dataset for graph classification.'
    return Bunch(graphs=[i for i in g_it],
                 label_dict=label_dict,
                 target=_target,
                 labels=True,
                 veclabels=False)
예제 #9
0
def load_graphs_NCI1():
    """Load the NCI1 graph dataset for graph classification..

    Returns
    -------
    data : Bunch
        Dictionary-like object with the following attributes :
        'graphs', the graphs in the dataset in Networkx format,  'target', the classification labels for each
        sample.
    """
    input_target_url = 'http://www.math.unipd.it/~nnavarin/datasets/NCI1/NCI1_labels.txt'
    input_data_url = 'http://www.math.unipd.it/~nnavarin/datasets/NCI1/NCI1_graphs.gspan'
    _target = load_target(input_target_url)
    label_dict = {}
    g_it = instance_to_graph(input=input_data_url)
    #g_it=instance_to_graph(input = input_data_url,label_dict=label_dict)

    print 'Loaded NCI1 graph dataset for graph classification.'
    return Bunch(
        graphs=[i for i in g_it],
        target=_target,
        #label_dict=label_dict,
        labels=True,
        veclabels=False)