Ejemplo n.º 1
0
 def display(self, data_list):
     # plot everything using bars
     if self.display_init == False:
         self.fig, self.axes = plt.subplots(6, 1)
         self.display_init = True
     bars = []
     for datum in data_list:
         bar = plot.gen_bar(datum)
         bars.append(bar)
     plot.plot_bar(bars, self.axes)
     plt.pause(0.000001)
from sklearn.utils import class_weight

import cnn
import data
import plot

if __name__ == '__main__':
    train_generator, valid_generator, test_generator = data.get_image_generators(preprocess_input)

    num_classes = train_generator.num_classes  # Counting BACKGROUND Class
    input_shape = (300, 200, 3)
    print("Test: " + sys.argv[1])
    epochs = 100
    values, counts = np.unique(train_generator.labels, return_counts=True)

    plot.plot_bar(values, counts)

    class_weights = class_weight.compute_class_weight('balanced',
                                                      np.unique(train_generator.labels),
                                                      train_generator.labels)
    class_weights = class_weights / class_weights.max()

    class_weights_dict = {}

    for num in np.unique(train_generator.labels):
        class_weights_dict[num] = class_weights[num]

    model = cnn.get_resnet_model(num_classes, input_shape)
    # model = cnn.get_xception_model(num_classes, input_shape)

    cnn.compile_cnn(model)
Ejemplo n.º 3
0
    for item in raw:
        if item[0] >= 604800 and item[0] < 1799999:
            data['604800-1799999'] += item[1]

    data['1800000-3599999'] = 0
    for item in raw:
        if item[0] >= 1800000 and item[0] < 3599999:
            data['1800000-3599999'] += item[1]

    data['3600000-10000000'] = 0
    for item in raw:
        if item[0] >= 3600000 and item[0] < 10000000:
            data['3600000-10000000'] += item[1]

    data['10000000-'] = 0
    for item in raw:
        if item[0] >= 10000000:
            data['10000000-'] += item[1]

    return data


if __name__ == '__main__':
    read_raw('dns_ttl.csv')
    data = read_csv('ttl_distribution.csv')
    sp = separate(data)
    write_csv(data, 'ttl_distribution.csv')
    plt.plot(data)
    plt.plot_scatter(data)
    plt.plot_bar(sp)
sns.scatterplot('y1', 'y3', data=data_gdoe['xy0'], size='x2', hue='x1', linewidth=0, cmap='RdYlGn_r')
plt.figure()
plt.tricontourf(data['xy0']['x1'],
                data['xy0']['x2'],
                data['xy0']['y1'],20, cmap='RdYlGn_r')
plt.colorbar()
plot_distribution('target_distribution', 5, 5, data_gdoe['y0'].columns, data_gdoe['y0'].values)
#plot ditribution of target array after normalisation
plot_distribution('target_distribution_normalised', 5, 5, data_gdoe['y0'].columns, N.t2n(data_gdoe['y0'].values))
#plot distribution of input array, also can be helpfull in setting calibration constraints
plot_distribution('input_distribution_normalised', 3, 3, data_gdoe['x0'].columns, data_gdoe['x0'].values)
#plot the interdependencies among features
plot_covariance_matrix(data_gdoe['x0'].values, data_gdoe['x0'].columns)
#plot relevance between input and output arrays using random forrest with n-estimators = 50
relevance = ml_relevance_matrix_etr(data_gdoe['x0'].values, data_gdoe['y0'].values, 50)
plot_bar('relevance', 5, 5, data_gdoe['y0'].columns,data_gdoe['x0'].columns[0:], relevance[0:,:])



#%% making model1 : An Iterative process
tf.random.set_seed(1)
tf.keras.backend.clear_session()
model1 = make_model1(save=False, name=None)
r_evaluator = []
dat_dyn_list, r_values = generate_dyns_from_folder('dynamic_files/', model1 ,data,N, None)
for i in range(1):
    history_stage1 = model1.fit(N.f2n(data['x_trn'].values),
                                    N.t2n(data['y_trn'].values),
                                    epochs = 100,
                                    batch_size= 500,
                                    validation_data=(N.f2n(data['x_vld'].values),N.t2n(data['y_vld'].values)),
Ejemplo n.º 5
0
 def plotCategoryCount(self, fileName=''):
     plot.plot_bar(self.vocTable.category, self.vocTable.categoryCount)
Ejemplo n.º 6
0
trigram_accuracy = taggers.evaluate_accuracy(trigram_tagger, test_set)

# ============================================================
# Backoff Model:
# ============================================================

backoff_model = taggers.backoff_model(all_words, all_tagged_words, train_set)
backoff_accuracy = taggers.evaluate_accuracy(backoff_model, test_set)

# ============================================================
# Plotting and Metrics:
# ============================================================

plot.plot_bar(
    ["Default", "Regex", "Lookup", "Unigram", "Bigram", "Trigram", "Backoff"],
    [
        default_accuracy, regex_accuracy, lookup_accuracy, unigram_accuracy,
        bigram_accuracy, trigram_accuracy, backoff_accuracy
    ], "all-taggers")

prec_rec_f1 = taggers.evaluate_precision_recall_fmeasure(
    brown, "news", backoff_model)
cm = taggers.create_confusion_matrix(brown, "news", backoff_model)

# ============================================================
# Task 2:
# ============================================================

# declare the sizes we wish to use when training, and the container with which we will record their accuracies.
sizes = 2**np.arange(16)
accuracies = []
Ejemplo n.º 7
0
def ttl_bar():
    ttl_dist = OrderedDict()
    raw = []

    with io.open("../data/3mths_ttl.txt", "r") as f:
        for line in f.readlines():
            raw.append(int(tuple(eval(line))[2]))

    for i in range(0, 12):
        index = str(i * 300) + '-' + str((i + 1) * 300 - 1)
        ttl_dist[index] = 0

        for item in raw:
            if item >= i * 300 and item < (i + 1) * 300:
                ttl_dist[index] += 1

    for i in range(0, 12):
        index = str(3600 + i * 600) + '-' + str(3600 + (i + 1) * 600 - 1)
        ttl_dist[index] = 0

        for item in raw:
            if item >= 3600 + i * 600 and item < 3600 + (i + 1) * 600:
                ttl_dist[index] += 1

    for i in range(0, 40):
        index = str(14400 + i * 1800) + '-' + str(14400 + (i + 1) * 1800 - 1)
        ttl_dist[index] = 0

        for item in raw:
            if item >= 14400 + i * 1800 and item < 14400 + (i + 1) * 1800:
                ttl_dist[index] += 1

    for i in range(0, 24):
        index = str(86400 + i * 3600) + '-' + str(86400 + (i + 1) * 3600 - 1)
        ttl_dist[index] = 0

        for item in raw:
            if item >= 86400 + i * 3600 and item < 86400 + (i + 1) * 3600:
                ttl_dist[index] += 1

    for i in range(0, 8):
        index = str(172800 + i * 21600) + '-' + str(172800 +
                                                    (i + 1) * 21600 - 1)
        ttl_dist[index] = 0

        for item in raw:
            if item >= 172800 + i * 21600 and item < 172800 + (i + 1) * 21600:
                ttl_dist[index] += 1

    ttl_dist['345600-604799'] = 0
    for item in raw:
        if item >= 345600 and item < 604799:
            ttl_dist['345600-604799'] += 1

    ttl_dist['604800-1799999'] = 0
    for item in raw:
        if item >= 604800 and item < 1799999:
            ttl_dist['604800-1799999'] += 1

    ttl_dist['1800000-3599999'] = 0
    for item in raw:
        if item >= 1800000 and item < 3599999:
            ttl_dist['1800000-3599999'] += 1

    ttl_dist['3600000-10000000'] = 0
    for item in raw:
        if item >= 3600000 and item < 10000000:
            ttl_dist['3600000-10000000'] += 1

    ttl_dist['10000000-'] = 0
    for item in raw:
        if item >= 10000000:
            ttl_dist['10000000-'] += 1

    plt.plot_bar(ttl_dist)
Ejemplo n.º 8
0
 def plot_bar(self) -> None:
     y = self.train()
     print(y)
     plot_bar(self.__class__.__name__, y, "λ", self.a)