Example #1
0
    def process_dataset():

        # The eval function takes a string as argument and evaluates this string as a Python expression.
        # The result of an expression is an object.
        str_to_dataset = eval(ui.dataset)

        (trainX, trainY), (testX,
                           testY) = str_to_dataset.load_data()  # load dataset

        if ui.mode == 'load':  # when in loader mode, display a sample of the images.

            # summarise loaded dataset
            print('Train: X=%s, y=%s' % (trainX.shape, trainY.shape))
            print('Test: X=%s, y=%s' % (testX.shape, testY.shape))

            for i in range(9):  # plot first few images
                # define subplot
                pyplot.subplot(330 + 1 + i)
                pyplot.imshow(trainX[i])
            pyplot.show()  # display the figure

        if ui.mode == 'pretrain' or ui.mode == 'hybrid':

            if ui.n_channel == 1:
                # Following is done for pre-trained only, convert images to 32 x 32 x 3 (RGB)
                trainX = [
                    cv2.cvtColor(cv2.resize(i, (32, 32)), cv2.COLOR_GRAY2BGR)
                    for i in trainX
                ]
                trainX = np.concatenate([arr[np.newaxis]
                                         for arr in trainX]).astype('float32')

                testX = [
                    cv2.cvtColor(cv2.resize(i, (32, 32)), cv2.COLOR_GRAY2BGR)
                    for i in testX
                ]
                testX = np.concatenate([arr[np.newaxis]
                                        for arr in testX]).astype('float32')

        if ui.mode == 'train' or ui.mode == 'load'\
                or ui.mode == 'hybrid':
            # check if images are grayscale. If not, then assume images are RGB
            if ui.n_channel == 1:
                # grayscale images => reshape dataset to have just one channel
                # convert dataset to shape of (28, 28, 1)
                trainX = trainX.reshape((trainX.shape[0], 28, 28, 1))
                testX = testX.reshape((testX.shape[0], 28, 28, 1))

        if ui.mode != 'hybrid':
            # one hot encode target values.
            # hybrid looks after to to_categorical in main hybrid function
            trainY = to_categorical(trainY)
            testY = to_categorical(testY)

        return trainX, trainY, testX, testY
 def test_make_negative_edges_check_neg_nodes(self):
     unique_node_ids = list(np.unique(self.nodes.id))
     neg_nodes = list(
         np.unique(np.concatenate((self.ne.subject, self.ne.object))))
     self.assertTrue(
         set(neg_nodes) <= set(unique_node_ids),
         "Some nodes from negative edges are not in the nodes tsv file")
def load_vuln_from_request(request, tokenizer):
    APP_NAME = 'XXBOS XXAN ' + request.appName
    APP_CONTEXT = 'XXAC ' + request.appContext
    VULN_NAME = 'XXVN ' + request.vulnName
    VULN_DESC = 'XXVD ' + request.vulnDescription
    SEVERITY = 'XXSV ' + request.severity + ' XXEOS'
    # tokenizing app_name
    tokenized_app_name = tokenizer.texts_to_sequences([APP_NAME])
    tokenized_app_name_padded = pad_sequences(tokenized_app_name, maxlen=20, padding='post')
    # tokenizing app_context
    tokenized_app_context = tokenizer.texts_to_sequences([APP_CONTEXT])
    tokenized_app_context_padded = pad_sequences(tokenized_app_name, maxlen=20, padding='post')
    # Tokenizing vuln name
    tokenized_vuln_name = tokenizer.texts_to_sequences([VULN_NAME])
    tokenized_vuln_name_padded = pad_sequences(tokenized_vuln_name, maxlen=20, padding='post')
    # Tokenizing Vuln Description
    tokenized_vuln_desc = tokenizer.texts_to_sequences([VULN_DESC])
    tokenized_vuln_desc_padded = pad_sequences(tokenized_vuln_desc, maxlen=800, padding='post')
    # Tokenizing severity
    tokenized_vuln_severity = tokenizer.texts_to_sequences([SEVERITY])
    tokenized_data_set = np.concatenate((tokenized_app_name_padded,
                                         tokenized_app_context_padded,
                                         tokenized_vuln_name_padded,
                                         tokenized_vuln_desc_padded,
                                         tokenized_vuln_severity),
                                        axis=1)
    return tokenized_data_set
Example #4
0
def converge_to_0_dvh(raw_dvh):
    """
    :param raw_dvh: Dictionary produced by calc_dvhs(..) function.
    :return: Dictionary of bincenters and counts (x and y of DVH)
    """
    res = {}
    zeros = np.zeros(3)
    for roi in raw_dvh:
        res[roi] = {}
        dvh = raw_dvh[roi]

        # The last value of DVH is not equal to 0
        if len(dvh.counts) > 0:
            if dvh.counts[-1] != 0:
                tmp_bincenters = []
                for i in range(3):
                    tmp_bincenters.append(dvh.bincenters[-1] + i)

                tmp_bincenters = np.array(tmp_bincenters)
                tmp_bincenters = np.concatenate(
                    (dvh.bincenters.flatten(), tmp_bincenters))
                bincenters = np.array(tmp_bincenters)
                counts = np.concatenate(
                    (dvh.counts.flatten(), np.array(zeros)))

            # The last value of DVH is equal to 0
            else:
                bincenters = dvh.bincenters
                counts = dvh.counts
        else:
            bincenters = dvh.bincenters
            counts = dvh.counts

        res[roi]['bincenters'] = bincenters
        res[roi]['counts'] = counts

    return res
def dice(img1, img2, labels=None, nargout=1):
 '''
 Dice [1] volume overlap metric

 The default is to *not* return a measure for the background layer (label = 0)

 [1] Dice, Lee R. "Measures of the amount of ecologic association between species."
 Ecology 26.3 (1945): 297-302.

 Parameters
 ----------
 vol1 : nd array. The first volume (e.g. predicted volume)
 vol2 : nd array. The second volume (e.g. "true" volume)
 labels : optional vector of labels on which to compute Dice.
     If this is not provided, Dice is computed on all non-background (non-0) labels
 nargout : optional control of output arguments. if 1, output Dice measure(s).
     if 2, output tuple of (Dice, labels)

 Output
 ------
 if nargout == 1 : dice : vector of dice measures for each labels
 if nargout == 2 : (dice, labels) : where labels is a vector of the labels on which
     dice was computed
 '''
 if labels is None:
  labels = np.unique(np.concatenate((img1, img2)))  # 输出一维数组
  labels = np.delete(labels, np.where(labels == 0))  # remove background

 dicem = np.zeros(len(labels))
 for idx, lab in enumerate(labels):
  top = 2 * np.sum(np.logical_and(img1 == lab, img2 == lab))
  bottom = np.sum(img1 == lab) + np.sum(img2 == lab)
  bottom = np.maximum(bottom, np.finfo(float).eps)  # add epsilon. 机器最小的正数
  dicem[idx] = top / bottom

 if nargout == 1:
  return dicem
 else:
  return (dicem, labels)
Example #6
0
    f1 = np.where(np.isnan(f1), np.zeros_like(f1), f1)

    return np.mean(f1)


# 计算Macro-F1
y_t = np.array([])
y_p = np.array([])
for x, y in test_db:

    y_pred = model(x)
    y_pred = tf.argmax(y_pred, axis=1).numpy()
    if y_p.size == 0:
        y_p = y_pred
    else:
        y_p = np.concatenate((y_p, y_pred), axis=0)

    y_true = tf.argmax(y, axis=1).numpy()
    if y_t.size == 0:
        y_t = y_true
    else:
        y_t = np.concatenate((y_t, y_true), axis=0)

mf = f1(y_pred, y_true)
print('F1 score:', mf)

# 查看识别错误的数据
for x, y in test_db:
    y_pred = model(x)
    y_pred = tf.argmax(y_pred, axis=1).numpy()
    y_true = tf.argmax(y, axis=1).numpy()
Example #7
0
 def create_pr_graph(classified_data, sampling_algorithm):
     # tuple_index = 1 if sampling_algorithm is not None else 0
     f, ax = plt.subplots()
     nml_y_true = np.concatenate(classified_data[0]['trues_list'])
     nml_probas = np.concatenate(classified_data[0]['preds_list'])
     resampled_y_true = np.concatenate(classified_data[1]['trues_list'])
     resampled_probas = np.concatenate(classified_data[1]['preds_list'])
     pr, re,  _ = precision_recall_curve(nml_y_true, nml_probas[:, 1])
     resam_pr, resam_re, _ = precision_recall_curve(resampled_y_true, resampled_probas[:, 1])
     avg_pre_normal_case = average_precision_score(nml_y_true, nml_probas[:, 1])
     # avg_pre_re_case = average_precision_score(classified_data[1]['trues_list'],
     #                                                      classified_data[1]['preds_list'])
     # ax.plot(re, pr, color='b', label='avg is', lw=2, alpha=.8)
     # ax.plot(avg_pre_re_case, color='g', label='qwe', lw=2, alpha=.8)
     #probas = np.concatenate(classified_data[tuple_index]['preds_list'], axis=0)
     #
     # re_probas = np.concatenate(classified_data[1]['preds_list'], axis=0)
     # re_y_true = np.concatenate(classified_data[1]['trues_list'])
     # plot_precision_recall_curve(nml_y_true, nml_probas,
     #                             title="Standard classification with average precision = {0:0.2f}".format(
     #                                 classified_data[0]['average_precision']),
     #                             curves=('micro'), ax=ax,
     #                             figsize=None, cmap='Reds',
     #                             title_fontsize="large",
     #                             text_fontsize="medium")
     # plot_precision_recall_curve(resampled_y_true, resampled_probas,
     #                             title="Resampled classification with average precision = {0:0.2f}".format(
     #                                 classified_data[1]['average_precision']),
     #                             curves=('micro'), ax=ax,
     #                             figsize=None, cmap='Reds',
     #                             title_fontsize="large",
     #                             text_fontsize="medium")
     # plot_precision_recall_curve(re_y_true, re_probas,
     #                             title="Normal dataset with average precision = {0:0.2f}".format(
     #                                 classified_data[tuple_index]['average_precision']) if tuple_index == 1
     #                             else "Resampled dataset with {0} and\n average precision ={1:0.2f}".format(
     #                                 sampling_algorithm, classified_data[tuple_index]['average_precision']),
     #                             curves=('micro'), ax=ax,
     #                             figsize=None, cmap='YlGnBu',
     #                             title_fontsize="large",
     #                             text_fontsize="medium")
     ax.get_figure().set_size_inches(5, 5)
     ax.spines['top'].set_visible(False)
     ax.spines['right'].set_visible(False)
     ax.set_title('PR chart')
     ax.set_xlabel('Recall')
     ax.set_ylabel('Precision')
     ax.set_ylim([0.0, 1.05])
     ax.set_xlim([0.0, 1.0])
     ax.plot(pr, re, color='b', label="Standard case (AUC = {:.2f})".format(classified_data[0]['average_precision']))
     ax.plot(resam_re, resam_pr, color='g', label="Re-sampled case (AUC = {:.2f})".format(classified_data[1]['average_precision']))
     ax.legend(loc="upper right", prop={'size': 7})
     ax.xaxis.labelpad = -0.5
     # plt.figure()
     # plt.step(re, pr, where='post')
     #
     # plt.xlabel('Recall')
     # plt.ylabel('Precision')
     # plt.ylim([0.0, 1.05])
     # plt.xlim([0.0, 1.0])
     # plt.title(
     #     'Average precision score, micro-averaged over all classes: AP={0:0.2f}'
     #         .format(123.3))
     # plt.show()
     canvas = FigureCanvasQTAgg(f)
     canvas.setMinimumHeight(350)
     canvas.setMaximumHeight(350)
     return canvas
gan = keras.models.Model(gan_input, gan_output)
gan_optimizer = keras.optimizers.RMSprop(lr=0.0004, clipvalue=1.0, decay=1e-8)
gan.compile(optimizer=gan_optimizer, loss='binary_crossentropy')

iterations = 10000
batch_size = 20
save_dir = 'output'
start = 0

for step in range(iterations):
    random_latent_vectors = np.random.normal(size=(batch_size, latent_dim))
    generated_images = generator.predict(random_latent_vectors)

    stop = start + batch_size
    real_images = x_train[start:stop]
    combined_images = np.concatenate([generated_images, real_images])
    labels = np.concatenate(
        [np.ones((batch_size, 1)),
         np.zeros((batch_size, 1))])
    labels += 0.05 * np.random.random(labels.shape)
    d_loss = discriminator.train_on_batch(combined_images, labels)
    random_latent_vectors = np.random.normal(size=(batch_size, latent_dim))
    misleading_targets = np.zeros((batch_size, 1))
    a_loss = gan.train_on_batch(random_latent_vectors, misleading_targets)
    start += batch_size
    if start > len(x_train) - batch_size:
        start = 0
    if step % 100 == 0:
        gan.save_weights('gan.h5')
        img = image.array_to_img(generated_images[0] * 255., scale=False)
        img.save(os.path.join(save_dir, 'generated_frog' + str(step) + '.png'))
Example #9
0
def pre_processor(ui):
    # Generic pre processor which can handle both grayscale and RGB datasets

    # load train and test datasets, reshape if necessary and one hot encode labels
    def process_dataset():

        # The eval function takes a string as argument and evaluates this string as a Python expression.
        # The result of an expression is an object.
        str_to_dataset = eval(ui.dataset)

        (trainX, trainY), (testX,
                           testY) = str_to_dataset.load_data()  # load dataset

        if ui.mode == 'load':  # when in loader mode, display a sample of the images.

            # summarise loaded dataset
            print('Train: X=%s, y=%s' % (trainX.shape, trainY.shape))
            print('Test: X=%s, y=%s' % (testX.shape, testY.shape))

            for i in range(9):  # plot first few images
                # define subplot
                pyplot.subplot(330 + 1 + i)
                pyplot.imshow(trainX[i])
            pyplot.show()  # display the figure

        if ui.mode == 'pretrain' or ui.mode == 'hybrid':

            if ui.n_channel == 1:
                # Following is done for pre-trained only, convert images to 32 x 32 x 3 (RGB)
                trainX = [
                    cv2.cvtColor(cv2.resize(i, (32, 32)), cv2.COLOR_GRAY2BGR)
                    for i in trainX
                ]
                trainX = np.concatenate([arr[np.newaxis]
                                         for arr in trainX]).astype('float32')

                testX = [
                    cv2.cvtColor(cv2.resize(i, (32, 32)), cv2.COLOR_GRAY2BGR)
                    for i in testX
                ]
                testX = np.concatenate([arr[np.newaxis]
                                        for arr in testX]).astype('float32')

        if ui.mode == 'train' or ui.mode == 'load'\
                or ui.mode == 'hybrid':
            # check if images are grayscale. If not, then assume images are RGB
            if ui.n_channel == 1:
                # grayscale images => reshape dataset to have just one channel
                # convert dataset to shape of (28, 28, 1)
                trainX = trainX.reshape((trainX.shape[0], 28, 28, 1))
                testX = testX.reshape((testX.shape[0], 28, 28, 1))

        if ui.mode != 'hybrid':
            # one hot encode target values.
            # hybrid looks after to to_categorical in main hybrid function
            trainY = to_categorical(trainY)
            testY = to_categorical(testY)

        return trainX, trainY, testX, testY

    # convert to floats and normalise
    def process_pixels(train, test):
        # integers -> floats
        train_n = train.astype('float32')
        test_n = test.astype('float32')

        # normalize to range 0-1
        train_n = train_n / 255.0
        test_n = test_n / 255.0

        return train_n, test_n  # return normalized images

    if ui.mode == 'ensemble':
        # ensemble mode involves passing output from keras models to sci-kit linear classifiers
        if ui.n_channel == 1:
            print("1 channel land !")
            (trainX, trainY), (testX, testY) = fashion_mnist.load_data(
            )  # load the training and testing data.

            if ui.optimize == 'hybrid':  # convert to 32 x 32 x 3 for pre trained ensemble networks
                trainX = [
                    cv2.cvtColor(cv2.resize(i, (32, 32)), cv2.COLOR_GRAY2BGR)
                    for i in trainX
                ]
                trainX = np.concatenate([arr[np.newaxis]
                                         for arr in trainX]).astype('float32')

                testX = [
                    cv2.cvtColor(cv2.resize(i, (32, 32)), cv2.COLOR_GRAY2BGR)
                    for i in testX
                ]
                testX = np.concatenate([arr[np.newaxis]
                                        for arr in testX]).astype('float32')

            else:
                trainX = trainX.reshape(
                    (trainX.shape[0], 28, 28,
                     1))  # reshape dataset to have a single channel
                testX = testX.reshape((testX.shape[0], 28, 28, 1))

        if ui.n_channel == 3:
            print("3 channel land !")
            (trainX, trainY), (testX, testY) = cifar10.load_data(
            )  # load the training and testing data.
            trainX = trainX.reshape(
                (trainX.shape[0], 32, 32,
                 3))  # reshape dataset to have a single channel
            testX = testX.reshape((testX.shape[0], 32, 32, 3))

    else:  # has been tested on autokeras

        trainX, trainY, testX, testY = process_dataset()  # load dataset
        trainX, testX = process_pixels(trainX, testX)  # prepare pixel data

    return trainX, trainY, testX, testY