def process_dataset(): # The eval function takes a string as argument and evaluates this string as a Python expression. # The result of an expression is an object. str_to_dataset = eval(ui.dataset) (trainX, trainY), (testX, testY) = str_to_dataset.load_data() # load dataset if ui.mode == 'load': # when in loader mode, display a sample of the images. # summarise loaded dataset print('Train: X=%s, y=%s' % (trainX.shape, trainY.shape)) print('Test: X=%s, y=%s' % (testX.shape, testY.shape)) for i in range(9): # plot first few images # define subplot pyplot.subplot(330 + 1 + i) pyplot.imshow(trainX[i]) pyplot.show() # display the figure if ui.mode == 'pretrain' or ui.mode == 'hybrid': if ui.n_channel == 1: # Following is done for pre-trained only, convert images to 32 x 32 x 3 (RGB) trainX = [ cv2.cvtColor(cv2.resize(i, (32, 32)), cv2.COLOR_GRAY2BGR) for i in trainX ] trainX = np.concatenate([arr[np.newaxis] for arr in trainX]).astype('float32') testX = [ cv2.cvtColor(cv2.resize(i, (32, 32)), cv2.COLOR_GRAY2BGR) for i in testX ] testX = np.concatenate([arr[np.newaxis] for arr in testX]).astype('float32') if ui.mode == 'train' or ui.mode == 'load'\ or ui.mode == 'hybrid': # check if images are grayscale. If not, then assume images are RGB if ui.n_channel == 1: # grayscale images => reshape dataset to have just one channel # convert dataset to shape of (28, 28, 1) trainX = trainX.reshape((trainX.shape[0], 28, 28, 1)) testX = testX.reshape((testX.shape[0], 28, 28, 1)) if ui.mode != 'hybrid': # one hot encode target values. # hybrid looks after to to_categorical in main hybrid function trainY = to_categorical(trainY) testY = to_categorical(testY) return trainX, trainY, testX, testY
def test_make_negative_edges_check_neg_nodes(self): unique_node_ids = list(np.unique(self.nodes.id)) neg_nodes = list( np.unique(np.concatenate((self.ne.subject, self.ne.object)))) self.assertTrue( set(neg_nodes) <= set(unique_node_ids), "Some nodes from negative edges are not in the nodes tsv file")
def load_vuln_from_request(request, tokenizer): APP_NAME = 'XXBOS XXAN ' + request.appName APP_CONTEXT = 'XXAC ' + request.appContext VULN_NAME = 'XXVN ' + request.vulnName VULN_DESC = 'XXVD ' + request.vulnDescription SEVERITY = 'XXSV ' + request.severity + ' XXEOS' # tokenizing app_name tokenized_app_name = tokenizer.texts_to_sequences([APP_NAME]) tokenized_app_name_padded = pad_sequences(tokenized_app_name, maxlen=20, padding='post') # tokenizing app_context tokenized_app_context = tokenizer.texts_to_sequences([APP_CONTEXT]) tokenized_app_context_padded = pad_sequences(tokenized_app_name, maxlen=20, padding='post') # Tokenizing vuln name tokenized_vuln_name = tokenizer.texts_to_sequences([VULN_NAME]) tokenized_vuln_name_padded = pad_sequences(tokenized_vuln_name, maxlen=20, padding='post') # Tokenizing Vuln Description tokenized_vuln_desc = tokenizer.texts_to_sequences([VULN_DESC]) tokenized_vuln_desc_padded = pad_sequences(tokenized_vuln_desc, maxlen=800, padding='post') # Tokenizing severity tokenized_vuln_severity = tokenizer.texts_to_sequences([SEVERITY]) tokenized_data_set = np.concatenate((tokenized_app_name_padded, tokenized_app_context_padded, tokenized_vuln_name_padded, tokenized_vuln_desc_padded, tokenized_vuln_severity), axis=1) return tokenized_data_set
def converge_to_0_dvh(raw_dvh): """ :param raw_dvh: Dictionary produced by calc_dvhs(..) function. :return: Dictionary of bincenters and counts (x and y of DVH) """ res = {} zeros = np.zeros(3) for roi in raw_dvh: res[roi] = {} dvh = raw_dvh[roi] # The last value of DVH is not equal to 0 if len(dvh.counts) > 0: if dvh.counts[-1] != 0: tmp_bincenters = [] for i in range(3): tmp_bincenters.append(dvh.bincenters[-1] + i) tmp_bincenters = np.array(tmp_bincenters) tmp_bincenters = np.concatenate( (dvh.bincenters.flatten(), tmp_bincenters)) bincenters = np.array(tmp_bincenters) counts = np.concatenate( (dvh.counts.flatten(), np.array(zeros))) # The last value of DVH is equal to 0 else: bincenters = dvh.bincenters counts = dvh.counts else: bincenters = dvh.bincenters counts = dvh.counts res[roi]['bincenters'] = bincenters res[roi]['counts'] = counts return res
def dice(img1, img2, labels=None, nargout=1): ''' Dice [1] volume overlap metric The default is to *not* return a measure for the background layer (label = 0) [1] Dice, Lee R. "Measures of the amount of ecologic association between species." Ecology 26.3 (1945): 297-302. Parameters ---------- vol1 : nd array. The first volume (e.g. predicted volume) vol2 : nd array. The second volume (e.g. "true" volume) labels : optional vector of labels on which to compute Dice. If this is not provided, Dice is computed on all non-background (non-0) labels nargout : optional control of output arguments. if 1, output Dice measure(s). if 2, output tuple of (Dice, labels) Output ------ if nargout == 1 : dice : vector of dice measures for each labels if nargout == 2 : (dice, labels) : where labels is a vector of the labels on which dice was computed ''' if labels is None: labels = np.unique(np.concatenate((img1, img2))) # 输出一维数组 labels = np.delete(labels, np.where(labels == 0)) # remove background dicem = np.zeros(len(labels)) for idx, lab in enumerate(labels): top = 2 * np.sum(np.logical_and(img1 == lab, img2 == lab)) bottom = np.sum(img1 == lab) + np.sum(img2 == lab) bottom = np.maximum(bottom, np.finfo(float).eps) # add epsilon. 机器最小的正数 dicem[idx] = top / bottom if nargout == 1: return dicem else: return (dicem, labels)
f1 = np.where(np.isnan(f1), np.zeros_like(f1), f1) return np.mean(f1) # 计算Macro-F1 y_t = np.array([]) y_p = np.array([]) for x, y in test_db: y_pred = model(x) y_pred = tf.argmax(y_pred, axis=1).numpy() if y_p.size == 0: y_p = y_pred else: y_p = np.concatenate((y_p, y_pred), axis=0) y_true = tf.argmax(y, axis=1).numpy() if y_t.size == 0: y_t = y_true else: y_t = np.concatenate((y_t, y_true), axis=0) mf = f1(y_pred, y_true) print('F1 score:', mf) # 查看识别错误的数据 for x, y in test_db: y_pred = model(x) y_pred = tf.argmax(y_pred, axis=1).numpy() y_true = tf.argmax(y, axis=1).numpy()
def create_pr_graph(classified_data, sampling_algorithm): # tuple_index = 1 if sampling_algorithm is not None else 0 f, ax = plt.subplots() nml_y_true = np.concatenate(classified_data[0]['trues_list']) nml_probas = np.concatenate(classified_data[0]['preds_list']) resampled_y_true = np.concatenate(classified_data[1]['trues_list']) resampled_probas = np.concatenate(classified_data[1]['preds_list']) pr, re, _ = precision_recall_curve(nml_y_true, nml_probas[:, 1]) resam_pr, resam_re, _ = precision_recall_curve(resampled_y_true, resampled_probas[:, 1]) avg_pre_normal_case = average_precision_score(nml_y_true, nml_probas[:, 1]) # avg_pre_re_case = average_precision_score(classified_data[1]['trues_list'], # classified_data[1]['preds_list']) # ax.plot(re, pr, color='b', label='avg is', lw=2, alpha=.8) # ax.plot(avg_pre_re_case, color='g', label='qwe', lw=2, alpha=.8) #probas = np.concatenate(classified_data[tuple_index]['preds_list'], axis=0) # # re_probas = np.concatenate(classified_data[1]['preds_list'], axis=0) # re_y_true = np.concatenate(classified_data[1]['trues_list']) # plot_precision_recall_curve(nml_y_true, nml_probas, # title="Standard classification with average precision = {0:0.2f}".format( # classified_data[0]['average_precision']), # curves=('micro'), ax=ax, # figsize=None, cmap='Reds', # title_fontsize="large", # text_fontsize="medium") # plot_precision_recall_curve(resampled_y_true, resampled_probas, # title="Resampled classification with average precision = {0:0.2f}".format( # classified_data[1]['average_precision']), # curves=('micro'), ax=ax, # figsize=None, cmap='Reds', # title_fontsize="large", # text_fontsize="medium") # plot_precision_recall_curve(re_y_true, re_probas, # title="Normal dataset with average precision = {0:0.2f}".format( # classified_data[tuple_index]['average_precision']) if tuple_index == 1 # else "Resampled dataset with {0} and\n average precision ={1:0.2f}".format( # sampling_algorithm, classified_data[tuple_index]['average_precision']), # curves=('micro'), ax=ax, # figsize=None, cmap='YlGnBu', # title_fontsize="large", # text_fontsize="medium") ax.get_figure().set_size_inches(5, 5) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.set_title('PR chart') ax.set_xlabel('Recall') ax.set_ylabel('Precision') ax.set_ylim([0.0, 1.05]) ax.set_xlim([0.0, 1.0]) ax.plot(pr, re, color='b', label="Standard case (AUC = {:.2f})".format(classified_data[0]['average_precision'])) ax.plot(resam_re, resam_pr, color='g', label="Re-sampled case (AUC = {:.2f})".format(classified_data[1]['average_precision'])) ax.legend(loc="upper right", prop={'size': 7}) ax.xaxis.labelpad = -0.5 # plt.figure() # plt.step(re, pr, where='post') # # plt.xlabel('Recall') # plt.ylabel('Precision') # plt.ylim([0.0, 1.05]) # plt.xlim([0.0, 1.0]) # plt.title( # 'Average precision score, micro-averaged over all classes: AP={0:0.2f}' # .format(123.3)) # plt.show() canvas = FigureCanvasQTAgg(f) canvas.setMinimumHeight(350) canvas.setMaximumHeight(350) return canvas
gan = keras.models.Model(gan_input, gan_output) gan_optimizer = keras.optimizers.RMSprop(lr=0.0004, clipvalue=1.0, decay=1e-8) gan.compile(optimizer=gan_optimizer, loss='binary_crossentropy') iterations = 10000 batch_size = 20 save_dir = 'output' start = 0 for step in range(iterations): random_latent_vectors = np.random.normal(size=(batch_size, latent_dim)) generated_images = generator.predict(random_latent_vectors) stop = start + batch_size real_images = x_train[start:stop] combined_images = np.concatenate([generated_images, real_images]) labels = np.concatenate( [np.ones((batch_size, 1)), np.zeros((batch_size, 1))]) labels += 0.05 * np.random.random(labels.shape) d_loss = discriminator.train_on_batch(combined_images, labels) random_latent_vectors = np.random.normal(size=(batch_size, latent_dim)) misleading_targets = np.zeros((batch_size, 1)) a_loss = gan.train_on_batch(random_latent_vectors, misleading_targets) start += batch_size if start > len(x_train) - batch_size: start = 0 if step % 100 == 0: gan.save_weights('gan.h5') img = image.array_to_img(generated_images[0] * 255., scale=False) img.save(os.path.join(save_dir, 'generated_frog' + str(step) + '.png'))
def pre_processor(ui): # Generic pre processor which can handle both grayscale and RGB datasets # load train and test datasets, reshape if necessary and one hot encode labels def process_dataset(): # The eval function takes a string as argument and evaluates this string as a Python expression. # The result of an expression is an object. str_to_dataset = eval(ui.dataset) (trainX, trainY), (testX, testY) = str_to_dataset.load_data() # load dataset if ui.mode == 'load': # when in loader mode, display a sample of the images. # summarise loaded dataset print('Train: X=%s, y=%s' % (trainX.shape, trainY.shape)) print('Test: X=%s, y=%s' % (testX.shape, testY.shape)) for i in range(9): # plot first few images # define subplot pyplot.subplot(330 + 1 + i) pyplot.imshow(trainX[i]) pyplot.show() # display the figure if ui.mode == 'pretrain' or ui.mode == 'hybrid': if ui.n_channel == 1: # Following is done for pre-trained only, convert images to 32 x 32 x 3 (RGB) trainX = [ cv2.cvtColor(cv2.resize(i, (32, 32)), cv2.COLOR_GRAY2BGR) for i in trainX ] trainX = np.concatenate([arr[np.newaxis] for arr in trainX]).astype('float32') testX = [ cv2.cvtColor(cv2.resize(i, (32, 32)), cv2.COLOR_GRAY2BGR) for i in testX ] testX = np.concatenate([arr[np.newaxis] for arr in testX]).astype('float32') if ui.mode == 'train' or ui.mode == 'load'\ or ui.mode == 'hybrid': # check if images are grayscale. If not, then assume images are RGB if ui.n_channel == 1: # grayscale images => reshape dataset to have just one channel # convert dataset to shape of (28, 28, 1) trainX = trainX.reshape((trainX.shape[0], 28, 28, 1)) testX = testX.reshape((testX.shape[0], 28, 28, 1)) if ui.mode != 'hybrid': # one hot encode target values. # hybrid looks after to to_categorical in main hybrid function trainY = to_categorical(trainY) testY = to_categorical(testY) return trainX, trainY, testX, testY # convert to floats and normalise def process_pixels(train, test): # integers -> floats train_n = train.astype('float32') test_n = test.astype('float32') # normalize to range 0-1 train_n = train_n / 255.0 test_n = test_n / 255.0 return train_n, test_n # return normalized images if ui.mode == 'ensemble': # ensemble mode involves passing output from keras models to sci-kit linear classifiers if ui.n_channel == 1: print("1 channel land !") (trainX, trainY), (testX, testY) = fashion_mnist.load_data( ) # load the training and testing data. if ui.optimize == 'hybrid': # convert to 32 x 32 x 3 for pre trained ensemble networks trainX = [ cv2.cvtColor(cv2.resize(i, (32, 32)), cv2.COLOR_GRAY2BGR) for i in trainX ] trainX = np.concatenate([arr[np.newaxis] for arr in trainX]).astype('float32') testX = [ cv2.cvtColor(cv2.resize(i, (32, 32)), cv2.COLOR_GRAY2BGR) for i in testX ] testX = np.concatenate([arr[np.newaxis] for arr in testX]).astype('float32') else: trainX = trainX.reshape( (trainX.shape[0], 28, 28, 1)) # reshape dataset to have a single channel testX = testX.reshape((testX.shape[0], 28, 28, 1)) if ui.n_channel == 3: print("3 channel land !") (trainX, trainY), (testX, testY) = cifar10.load_data( ) # load the training and testing data. trainX = trainX.reshape( (trainX.shape[0], 32, 32, 3)) # reshape dataset to have a single channel testX = testX.reshape((testX.shape[0], 32, 32, 3)) else: # has been tested on autokeras trainX, trainY, testX, testY = process_dataset() # load dataset trainX, testX = process_pixels(trainX, testX) # prepare pixel data return trainX, trainY, testX, testY