Ejemplo n.º 1
0
#Es. create non overlapping set
s = data3.shuffle()
s1 = data3[0:50]
s2 = data3[50:100]
s3 = data3[100:]

rawDat_s1,rawLabels_s1 = [s1.data,s1.labels]
rawDat_s2,rawLabels_s2 = [s2.data,s2.labels]

#GRAPH

#download from TUDataset repository
#TODO: will be change
datasets.get_dataset("COIL-RAG")
gdata1 = graph_nxDataset("/home/luca/Documenti/Progetti/E-ABC_v2/eabc_v2/Datasets/tudataset/COIL-RAG", "Letter-h", readergraph, pre_transform=gTransfItem())
gdata2 = graph_nxDataset("/home/luca/Documenti/Progetti/E-ABC_v2/eabc_v2/Datasets/tudataset/COIL-RAG", "Letter-h", readergraph, transform = gTransfItem())
gdata3 = graph_nxDataset("/home/luca/Documenti/Progetti/E-ABC_v2/eabc_v2/Datasets/tudataset/COIL-RAG", "Letter-h", readergraph)

print("original data get item: ", gdata3[0].x.nodes[0]['attributes'])
print("transform data get item: ", gdata2[0].x.nodes[0]['attributes'])
print("pre_transformed data get item:", gdata1[0].x.nodes[0]['attributes'])

print("original data stored value: ", gdata3.data[0].nodes[0]['attributes'])
print("transform data stored value: ", gdata2.data[0].nodes[0]['attributes'])
print("pre_transformed data stored value: ", gdata1.data[0].nodes[0]['attributes'])


# ####
# data1_perm = data1.shuffle()
# data0_key = data1_perm.indices[0]
Ejemplo n.º 2
0

def edgeDissimilarity(a, b):
    D = 0
    if (a['labels'] != b['labels']):
        D = 1
    return D


def readergraph(path):
    graphs_nx = reader.tud_to_networkx("Mutagenicity")
    classes = [g.graph['classes'] for g in graphs_nx]
    return graphs_nx, classes


print("Loading...")
data1 = graph_nxDataset(
    "/home/luca/Documenti/Progetti/E-ABC_v2/eabc_v2/Datasets/tudataset/Mutagenicity",
    "Mutagenicity", readergraph)
#not connected graph
cleanData = [(g, idx, label)
             for g, idx, label in zip(data1.data, data1.indices, data1.labels)
             if nx.is_connected(g)]
cleanData = np.asarray(cleanData, dtype=object)
data1 = graph_nxDataset([cleanData[:, 0], cleanData[:, 2]], "Mutagenicity")
data1 = data1[0:10]

graphDist = BMF(nodeDissimilarity, edgeDissimilarity)
x1 = graphDist.pdist(data1.data, forceSym=True)

x2 = graphDist.pdist(data1.data, forceSym=False)
Ejemplo n.º 3
0
# @jitclass
def nodeDissimilarity(a, b):
    return np.linalg.norm(np.asarray(a['attributes']) - np.asarray(b['attributes'])) / np.sqrt(2)

# @jitclass
def edgeDissimilarity(a,b):
    return 0.0
    
def readergraph(path):
    graphs_nx = reader.tud_to_networkx("Letter-high")
    classes = [g.graph['classes'] for g in graphs_nx]
    return graphs_nx, classes 

print("Loading...")
data1 = graph_nxDataset("/home/luca/Documenti/Progetti/E-ABC_v2/eabc_v2/Datasets/tudataset/Letter-high", "LetterH", reader = readergraph)
#Removed not connected graph and null graph!
cleanData=[]
for g,idx,label in zip(data1.data,data1.indices,data1.labels):
    if not nx.is_empty(g):
        if nx.is_connected(g):
            cleanData.append((g,idx,label)) 
#cleanData = [(g,idx,label) for g,idx,label in zip(data1.data,data1.indices,data1.labels) if not nx.is_empty(g)]

cleanData = np.asarray(cleanData,dtype=object)
data1 = graph_nxDataset([cleanData[:,0],cleanData[:,2]],"Letter")
data1= data1[0:100]

#Test extr indices
data1 = data1.shuffle()
Ejemplo n.º 4
0
def main():

    # def eaSimple(population, toolbox, cxpb, mutpb, ngen, stats=None,
    #              halloffame=None, verbose=__debug__):
    ####################
    random.seed(64)
    verbose = True

    population = toolbox.population(n=POP_SIZE)
    cxpb = CXPROB
    mutpb = MUTPROB
    ngen = N_GEN

    halloffame = tools.HallOfFame(1)
    stats = tools.Statistics(lambda ind: ind.fitness.values)
    stats.register("avg", np.mean)
    stats.register("std", np.std)
    stats.register("min", np.min)
    stats.register("max", np.max)

    logbook = tools.Logbook()
    logbook.header = ['gen', 'nevals'] + (stats.fields if stats else [])
    ###################

    print("Loading...")
    data1 = graph_nxDataset(
        "/home/luca/Documenti/Progetti/E-ABC_v2/eabc_v2/Datasets/tudataset/Mutagenicity",
        "Mutagenicity", readergraph)
    #not connected graph
    cleanData = [
        (g, idx, label)
        for g, idx, label in zip(data1.data, data1.indices, data1.labels)
        if nx.is_connected(g)
    ]
    cleanData = np.asarray(cleanData, dtype=object)
    data1 = graph_nxDataset([cleanData[:, 0], cleanData[:, 2]], "Mutagenicity")

    extract_func = randomwalk_restart.extr_strategy(max_order=6)
    subgraph_extr = Extractor(extract_func)

    # Evaluate the individuals with an invalid fitness
    print("Initializing population...")
    subgraphs = [
        subgraph_extr.randomExtractDataset(data1, 1000) for _ in population
    ]
    #    mapWithConst = partial(toolbox.evaluate,granulationBucket=subgraphs)
    invalid_ind = [ind for ind in population if not ind.fitness.valid]

    fitnesses = toolbox.map(toolbox.evaluate, zip(invalid_ind, subgraphs))

    for ind, fit in zip(invalid_ind, fitnesses):
        ind.fitness.values = fit

    if halloffame is not None:
        halloffame.update(population)

    record = stats.compile(population) if stats else {}
    logbook.record(gen=0, nevals=len(invalid_ind), **record)
    if verbose:
        print(logbook.stream)

    # Begin the generational process
    print("Start evolution")
    for gen in range(1, ngen + 1):
        # Select the next generation individuals
        offspring = toolbox.select(population, len(population))

        # Vary the pool of individuals
        offspring = varAnd(offspring, toolbox, cxpb, mutpb)

        # Evaluate the individuals with an invalid fitness

        subgraphs = [
            subgraph_extr.randomExtractDataset(data1, 1000) for _ in population
        ]
        invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
        #mapWithConst = partial(toolbox.evaluate,granulationBucket=subgraphs)
        fitnesses = toolbox.map(toolbox.evaluate, zip(invalid_ind, subgraphs))

        for ind, fit in zip(invalid_ind, fitnesses):
            ind.fitness.values = fit

        # Update the hall of fame with the generated individuals
        if halloffame is not None:
            halloffame.update(offspring)

        # Replace the current population by the offspring
        population[:] = offspring

        # Append the current generation statistics to the logbook
        record = stats.compile(population) if stats else {}
        logbook.record(gen=gen, nevals=len(invalid_ind), **record)
        if verbose:
            print(logbook.stream)

    return population, logbook
Ejemplo n.º 5
0
def main():

    # """
    # Reproducing GRALG optimization with mu+lambda strategy of evolution
    # """
    ####################
    random.seed(64)
    verbose = True

    population = toolbox.population(n=MU)
    cxpb = CXPROB
    mutpb = MUTPROB
    ngen = N_GEN
    mu = MU
    lambda_ = LAMBDA

    halloffame = tools.HallOfFame(1)
    stats = tools.Statistics(lambda ind: ind.fitness.values)
    stats.register("avg", np.mean)
    stats.register("std", np.std)
    stats.register("min", np.min)
    stats.register("max", np.max)

    logbook = tools.Logbook()
    logbook.header = ['gen', 'nevals'] + (stats.fields if stats else [])
    ###################

    print("Loading...")
    data1 = graph_nxDataset(
        "/home/luca/Documenti/Progetti/E-ABC_v2/eabc_v2/Datasets/IAM/Letter3/Training/",
        "LetterH",
        reader=IAMreadergraph)
    data2 = graph_nxDataset(
        "/home/luca/Documenti/Progetti/E-ABC_v2/eabc_v2/Datasets/IAM/Letter3/Validation/",
        "LetterH",
        reader=IAMreadergraph)
    # data1 = data1.shuffle()
    # data2 = data2.shuffle()
    #Removed not connected graph and null graph!
    cleanData = []
    for dataset in [data1, data2]:
        for g, idx, label in zip(dataset.data, dataset.indices,
                                 dataset.labels):
            if not nx.is_empty(g):
                if nx.is_connected(g):
                    cleanData.append((g, idx, label))

    cleanData = np.asarray(cleanData, dtype=object)
    normalize('coords', cleanData[:750, 0], cleanData[750:, 0])

    #Slightly different from dataset used in pygralg
    dataTR = graph_nxDataset([cleanData[:750, 0], cleanData[:750, 2]],
                             "LetterH",
                             idx=cleanData[:750, 1])
    dataVS = graph_nxDataset([cleanData[750:, 0], cleanData[750:, 2]],
                             "LetterH",
                             idx=cleanData[750:, 1])
    del data1
    del cleanData

    print("Setup...")

    extract_func = randomwalk_restart.extr_strategy(max_order=6)
    #    extract_func = breadthFirstSearch.extr_strategy(max_order=6)
    #    subgraph_extr = Extractor(extract_func)
    subgraph_extr = Extractor(extract_func)

    expTRSet = dataTR.fresh_dpcopy()
    for i, x in enumerate(dataTR):
        k = 0
        while (k < 50):
            for j in range(1, 6):
                subgraph_extr.max_order = j
                expTRSet.add_keyVal(dataTR.to_key(i), subgraph_extr.extract(x))
            k += 6
    expVSSet = dataVS.fresh_dpcopy()
    for i, x in enumerate(dataVS):
        k = 0
        while (k < 50):
            for j in range(1, 6):
                subgraph_extr.max_order = j
                expVSSet.add_keyVal(dataVS.to_key(i), subgraph_extr.extract(x))
            k += 6
    # Evaluate the individuals with an invalid fitness
    print("Initializing population...")
    subgraphs = subgraph_extr.randomExtractDataset(dataTR, 1260)
    invalid_ind = [ind for ind in population if not ind.fitness.valid]

    fitnesses = toolbox.map(
        functools.partial(toolbox.evaluate,
                          granulationBucket=subgraphs,
                          trEmbeddBucket=expTRSet,
                          vsEmbeddBucket=expVSSet,
                          TRindices=dataTR.indices,
                          VSindices=dataVS.indices,
                          TRlabels=dataTR.labels,
                          VSlabels=dataVS.labels), invalid_ind)

    for ind, fit in zip(invalid_ind, fitnesses):
        ind.fitness.values = fit

    if halloffame is not None:
        halloffame.update(population)

    record = stats.compile(population) if stats else {}
    logbook.record(gen=0, nevals=len(invalid_ind), **record)
    if verbose:
        print(logbook.stream)

    # Begin the generational process
    for gen in range(1, ngen + 1):
        # Vary the population
        offspring = varOr(population, toolbox, lambda_, cxpb, mutpb)

        #Evaluate invalid of modified individual
        invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
        fitnesses = toolbox.map(
            functools.partial(toolbox.evaluate,
                              granulationBucket=subgraphs,
                              trEmbeddBucket=expTRSet,
                              vsEmbeddBucket=expVSSet,
                              TRindices=dataTR.indices,
                              VSindices=dataVS.indices,
                              TRlabels=dataTR.labels,
                              VSlabels=dataVS.labels), invalid_ind)

        for ind, fit in zip(invalid_ind, fitnesses):
            ind.fitness.values = fit

        # Update the hall of fame with the generated individuals
        if halloffame is not None:
            halloffame.update(offspring)

        # Select the next generation population
        population[:] = toolbox.select(population + offspring, mu)

        # Update the statistics with the new population
        record = stats.compile(population) if stats is not None else {}
        logbook.record(gen=gen, nevals=len(invalid_ind), **record)
        if verbose:
            print(logbook.stream)

    return population, logbook