Beispiel #1
0
def multiprocess_func(test_set, train_set, fold, fitness_file, output_file, dataRetriever, cost_func, current_data_set, cross_over_prob=0.7,mutation_rate=0.3, maxIter=1000, batch_size=0.6, population_size=110, network_architecture=[15], pb_actor=None):
    
    print("=========================")
    print("Fold Num: ", fold)
    # Encode Data
    test_set = test_set.reset_index(drop=True)
    train_set = train_set.reset_index(drop=True)
    ohe = OneHotEncoder()
    discrete_attr = dataRetriever.getDescreteAttributes()
    if dataRetriever.getDataClass() in discrete_attr:
        discrete_attr.remove(dataRetriever.getDataClass())

    train_set = ohe.train_fit(train_set, discrete_attr)
    test_set = ohe.fit(test_set)

    #  Normalize Data
    sn = StandardNormalizer(train_set[dataRetriever.getContinuousAttributes()])
    train_set[dataRetriever.getContinuousAttributes()] = sn.train_fit()
    test_set[dataRetriever.getContinuousAttributes()] = sn.fit(test_set[dataRetriever.getContinuousAttributes()])

    # Train network and change architecture in respect to data set
    nn = NeuralNetwork(train_set, len(network_architecture), network_architecture, dataRetriever.getPredictionType(), dataRetriever.getDataClass())
    
    fitnesses = nn.differential_evolution(population_size, maxIter, batch_size, mutation_rate, cross_over_prob, cost_func)
    final = nn.test(test_set.drop(dataRetriever.getDataClass(), axis=1))
    output = nn._feed_forward(test_set.drop(dataRetriever.getDataClass(), axis=1), testing=True)
    actual = test_set[dataRetriever.getDataClass()]

    fitness_pd = pd.DataFrame(fitnesses,columns=["Max_Weight", "Min_Weight", "Mean_Fitness"])
    fitness_pd.to_csv(fitness_file, index=False)

    print("Fold Performance:")
    if dataRetriever.getPredictionType() == "classification":
    # ## ===================== Classification =================
        correct = 0
        for i, row in enumerate(final):
            if row == actual.iloc[i]: correct += 1

        acc = correct/len(test_set)

        print(f"Accuracy: {acc}")
        output_pd = pd.DataFrame({'Truth':actual.to_list(), 'Predicted':final})
    
        output_pd.to_csv(output_file, index=False)
        return acc
    else:
        output = output.reshape(output.shape[0])
        
        res = actual-output
        r2 = 1-((res**2).sum()/(((actual-actual.mean())**2).sum()))
        print(f"R2: {r2}")
        output_pd = pd.DataFrame({'Truth':actual.to_list(), 'Predicted':output})
    
        output_pd.to_csv(output_file, index=False)
        return float(r2)
Beispiel #2
0
train_set = train_set.reset_index(drop=True)
ohe = OneHotEncoder()
discrete_attr = dataRetriever.getDescreteAttributes()
if dataRetriever.getDataClass() in discrete_attr:
    discrete_attr.remove(dataRetriever.getDataClass())

train_set = ohe.train_fit(train_set, discrete_attr)
test_set = ohe.fit(test_set)

#  Normalize Data
sn = StandardNormalizer(train_set[dataRetriever.getContinuousAttributes()])
train_set[dataRetriever.getContinuousAttributes()] = sn.train_fit()
test_set[dataRetriever.getContinuousAttributes()] = sn.fit(test_set[dataRetriever.getContinuousAttributes()])

# Train network and change architecture in respect to data set
nn = NeuralNetwork(train_set, 2, [6,16], dataRetriever.getPredictionType(), dataRetriever.getDataClass())
fitness_matrix, average_fitness = nn._particle_swarm_optimize(70, max_iter=500)


predictions = nn._feed_forward(test_set.drop(dataRetriever.getDataClass(), axis=1), testing=True)

actual = test_set[dataRetriever.getDataClass()]
metrics = np.asarray(metrics)

fig, ax = plt.subplots(3)
ax[0].plot(fitness_matrix[:,0], label="1")
ax[0].plot(fitness_matrix[:,1], label="34")
ax[0].plot(fitness_matrix[:,2], label="68")
ax[0].plot(fitness_matrix[:,3], label="Best")
ax[0].legend()
print(f"Average Accuracy: {np.asarray(metrics).mean()} ± {metrics.std()}")
Beispiel #3
0
def network_tuner(*nodes_per_hidden_layer):
    """
    This function is used to calcuate the optimal network architecture
    The user should input the dataset they would like to operate with and change the performance metric in accordance to the data set type IE regression or classification 

    """
    
    MSEs = []

    bestNetwork = {}
    learning_rate = 0.0001
    maxItter = 500
    batch_size = .5

    dataRetriever = DataRetriever("../Datasets/metadata.json")
    dataRetriever.retrieveData("glass")
    dataset = dataRetriever.getDataSet().dropna()


    dataset = dataset.reset_index(drop=True)

    # This line is used to normalize the data for Forest Fires
    # dataset[dataRetriever.getDataClass()] = np.log(dataset[dataRetriever.getDataClass()]+0.1)

    dataset[dataRetriever.getContinuousAttributes()] = (dataset[dataRetriever.getContinuousAttributes()]-dataset[dataRetriever.getContinuousAttributes()].mean())/dataset[dataRetriever.getContinuousAttributes()].std()

    test_set = dataset.sample(frac=0.1, random_state=69)
    train_set = dataset.drop(test_set.index)
    test_set = test_set.reset_index(drop=True)
    train_set = train_set.reset_index(drop=True)

    ohe = OneHotEncoder()
    discrete_attr = dataRetriever.getDescreteAttributes()
    if dataRetriever.getDataClass() in discrete_attr:
        discrete_attr.remove(dataRetriever.getDataClass())

    datasetEncoded = ohe.train_fit(train_set, dataRetriever.getDescreteAttributes())
    testEncoded = ohe.fit(test_set)


    output = None
    nn = NeuralNetwork(datasetEncoded, 0, [], dataRetriever.getPredictionType(), dataRetriever.getDataClass())
    for i in range(maxItter):
        # We don't call an inital feedforward because backpropagate starts with a feedforward call
        # batch_size represents the number of data points per batch
        output = nn._back_propagate(learning_rate=learning_rate, batch_size=batch_size)


    final = nn.test(testEncoded.drop(dataRetriever.getDataClass(), axis=1))
    output = nn._feed_forward(testEncoded.drop(dataRetriever.getDataClass(), axis=1), testing=True)
    actual = testEncoded[dataRetriever.getDataClass()]


    ## ===================== Classification =================
    correct = 0
    acc = 0
    for i, row in enumerate(final):
        if row == actual.iloc[i]: correct += 1


    # final = final.reshape(final.shape[0])

    # MSE = ((actual-final)**2).mean()
    # MSEs.append(MSE)
    bestNetwork['network'] = nn
    bestNetwork['acc'] = acc
    bestNetwork['arc'] = [0]
    # # ============================================

    # # ============ Compare Acc to Most Common Class

    values = test_set[dataRetriever.getDataClass()].value_counts()


    # USED FOR CLASSIFICATION
    # print(f'Accuracy: {acc}')
    # print(f'Max Class Prior: {values.max()/values.sum()}')
    # print(f"Class Distribution:\n{values}")
    # print("Final: ", final)
    # print("Actual: ", list(actual))
    # print()



    numOfLayer = len(nodes_per_hidden_layer)
    print("Number of Hidden Layers: ", numOfLayer)
    for layer in range(numOfLayer):
        print(f"Layer Number: {layer + 1}")
        combinations = list(itertools.product(*nodes_per_hidden_layer[:layer+1]))

        for combo in combinations:

            output = None
            print("Node Combination: ",list(combo))
            print(combo)

            nn = NeuralNetwork(datasetEncoded, layer, list(combo), dataRetriever.getPredictionType(), dataRetriever.getDataClass())
            for i in range(maxItter):
                # We don't call an inital feedforward because backpropagate starts with a feedforward call
                # batch_size represents the number of data points per batch
                output = nn._back_propagate(learning_rate=learning_rate, batch_size=batch_size)

            final = nn.test(testEncoded.drop(dataRetriever.getDataClass(), axis=1))
            output = nn._feed_forward(testEncoded.drop(dataRetriever.getDataClass(), axis=1), testing=True)
            actual = testEncoded[dataRetriever.getDataClass()]

            ## ===================== Classification =================
            correct = 0
            acc = 0
            for i, row in enumerate(final):
                if row == actual.iloc[i]: correct += 1

            acc = correct/len(test_set)
            # # # ============================================

            # # # ============ Compare Acc to Most Common Class

            values = test_set[dataRetriever.getDataClass()].value_counts()

            # USED FOR CLASSIFICATION
            # print(f'Accuracy: {acc}')
            # print(f'Max Class Prior: {values.max()/values.sum()}')
            # # print(f"Class Distribution:\n{values}")
            # print("Final: ", final)
            # print("Actual: ", list(actual))
            # print()

            if acc > bestNetwork['acc']:
                bestNetwork['network'] = nn
                bestNetwork['acc'] = acc
                bestNetwork['arc'] = combo

            # final = final.reshape(final.shape[0])

            # MSE = ((actual-final)**2).mean()
            # MSEs.append(MSE)
            # if MSE < bestNetwork['acc']:
            #     bestNetwork['network'] = nn
            #     bestNetwork['acc'] = MSE
            #     bestNetwork['arc'] = combo

            



    return bestNetwork#, MSEs
    ohe = OneHotEncoder()
    discrete_attr = dataRetriever.getDescreteAttributes()
    if dataRetriever.getDataClass() in discrete_attr:
        discrete_attr.remove(dataRetriever.getDataClass())

    train_set = ohe.train_fit(train_set, discrete_attr)
    test_set = ohe.fit(test_set)

    #  Normalize Data
    sn = StandardNormalizer(train_set[dataRetriever.getContinuousAttributes()])
    train_set[dataRetriever.getContinuousAttributes()] = sn.train_fit()
    test_set[dataRetriever.getContinuousAttributes()] = sn.fit(
        test_set[dataRetriever.getContinuousAttributes()])

    # Train network and change architecture in respect to data set
    nn = NeuralNetwork(train_set, 2, [2, 2], dataRetriever.getPredictionType(),
                       dataRetriever.getDataClass())
    nn.train(maxIter, learning_rate, batch_size)

    # predictions = nn.test(test_set.drop(dataRetriever.getDataClass(), axis=1))

    # # ca = ClassifierAnalyzer(test_set[dataRetriever.getDataClass()], predictions)
    # correct = 0
    # actual = test_set[dataRetriever.getDataClass()]
    # for i, row in enumerate(predictions):
    #     if row == actual.iloc[i]: correct += 1
    # metrics.append(correct/len(actual))
    break

metrics = np.asarray(metrics)
prior = 1 / dataset[dataRetriever.getDataClass()].nunique()
sampling_sd = np.sqrt((prior * (1 - prior)) / (10))
train_set = train_set.reset_index(drop=True)

ohe = OneHotEncoder()

if dataRetriever.getDataClass() in discrete_attr:
    discrete_attr.remove(dataRetriever.getDataClass())

datasetEncoded = ohe.train_fit(train_set,
                               dataRetriever.getDescreteAttributes())
testEncoded = ohe.fit(test_set)

# ======================= Create Best Individual ================
print(title_text)

best = NeuralNetwork(datasetEncoded, 1, [25],
                     dataRetriever.getPredictionType(),
                     dataRetriever.getDataClass())
fitnesses = best.genetic_algorithm(population_size, maxItter, batch_size,
                                   mutation_rate, 10,
                                   cost_func[current_data_set])

# ======================= Test Best Individual ================
final = best.test(testEncoded.drop(dataRetriever.getDataClass(), axis=1))
output = best._feed_forward(testEncoded.drop(dataRetriever.getDataClass(),
                                             axis=1),
                            testing=True)
actual = testEncoded[dataRetriever.getDataClass()]
if dataRetriever.getPredictionType() == "classification":
    # ## ===================== Classification =================
    print("Best")
    correct = 0
ohe = OneHotEncoder()
discrete_attr = dataRetriever.getDescreteAttributes()

if dataRetriever.getDataClass() in discrete_attr:
    discrete_attr.remove(dataRetriever.getDataClass())

datasetEncoded = ohe.train_fit(train_set,
                               dataRetriever.getDescreteAttributes())
testEncoded = ohe.fit(test_set)

# ======================= Create Best Individual ================
print(title_text)

best = NeuralNetwork(datasetEncoded, len(nodes_per_layer), nodes_per_layer,
                     dataRetriever.getPredictionType(),
                     dataRetriever.getDataClass())
fitnesses = best.differential_evolution(population_size, maxItter, batch_size,
                                        mutation_rate, cross_over_prob,
                                        cost_func[current_data_set])

# ======================= Test Best Individual ================
final = best.test(testEncoded.drop(dataRetriever.getDataClass(), axis=1))
output = best._feed_forward(testEncoded.drop(dataRetriever.getDataClass(),
                                             axis=1),
                            testing=True)
actual = testEncoded[dataRetriever.getDataClass()]
if dataRetriever.getPredictionType() == "classification":
    # ## ===================== Classification =================
    print("Best")
    correct = 0