def load_knifey_transfer_values(): # load inception model model = load_inception_model() # load cifar10 dataset data = load_knifey() x_train, y_train_cls, y_train, x_test, y_test_cls, y_test, cls_names = data # compute, cache, and read transfer-values data_dir = "data/knifey-spoony/" file_path_cache_train = os.path.join(data_dir, 'inception-knifey-train.pkl') file_path_cache_test = os.path.join(data_dir, 'inception-knifey-test.pkl') print("Processing Inception transfer-values for training-images ...") # If transfer-values have already been calculated then reload them, # otherwise calculate them and save them to a cache-file. x_train_transfer_values = transfer_values_cache( cache_path=file_path_cache_train, image_paths=x_train, model=model) print("Processing Inception transfer-values for test-images ...") # If transfer-values have already been calculated then reload them, # otherwise calculate them and save them to a cache-file. x_test_transfer_values = transfer_values_cache( cache_path=file_path_cache_test, image_paths=x_test, model=model) data = (x_train, y_train_cls, y_train, x_test, y_test_cls, y_test, cls_names, x_train_transfer_values, x_test_transfer_values) return data
def writeImageValues(): print("Processing Inception transfer-values for training-images ...") batchCount = 0 while (True): trainBatchX, _, trainBatchY, numberOfBatches, namesTrain = next( trainYielder) x_batch, y_true_batch = trainBatchX, toOneHotEncodingVectorForm( trainBatchY.flatten(), 0, 19) for i in range(trainBatchX.shape[0]): np.array( transfer_values_cache(cache_path=getFileName( True, namesTrain[i]), images=np.expand_dims(x_batch[i], axis=0), model=model)) batchCount += 1 if batchCount == numberOfBatches: break batchCount = 0 while (True): valX, _, valY, numberOfBatches2, namesVal = next(valYielder) x_valid_batch, y_valid_batch = valX, toOneHotEncodingVectorForm( valY.flatten(), 0, 19) for i in range(valX.shape[0]): np.array( transfer_values_cache(cache_path=getFileName( False, namesVal[i]), images=np.expand_dims(x_valid_batch[i], axis=0), model=model)) batchCount += 1 if batchCount == numberOfBatches2: break batchCount = 0 while (True): testX, _, numberOfBatches3, testXImageNames = next(testYielder) for i in range(testX.shape[0]): np.array( transfer_values_cache(cache_path=getFileName( False, testXImageNames[i]), images=np.expand_dims(testX[i], axis=0), model=model)) batchCount += 1 if batchCount == numberOfBatches3: print("done saving data") break return
def predictTest(inceptionModel, testDataProvider): batchCount = 0 finalList = [] imageNames = [] while (True): valX, valXGray, numberOfBatches, namesTest = next(testDataProvider) for i in range(valX.shape[0]): transfer_values_val = np.array( transfer_values_cache(cache_path=getFileName( False, namesTest[i]), images=np.expand_dims(valX[i], axis=0), model=inceptionModel)) if i == 0: valFeatures = transfer_values_val else: valFeatures = np.vstack((valFeatures, transfer_values_val)) feed_dict_val = { processedImages: valXGray, inceptionFeatureVec: valFeatures } predictedClasses = session.run(y_pred_cls, feed_dict_val) finalList = finalList + predictedClasses.tolist() imageNames = imageNames + namesTest.tolist() #print("[%d]: Val:%f" % (batchCount, valAcc)) batchCount += 1 if batchCount == numberOfBatches: break return imageNames, finalList
def predictValidation(inceptionModel, valYielder): batchCount = 0 accuracyList = [] while (True): valX, valXGray, valY, numberOfBatches, namesVal = next(valYielder) y_valid_batch = toOneHotEncodingVectorForm(valY.flatten(), 0, 19) for i in range(valX.shape[0]): transfer_values_val = np.array( transfer_values_cache(cache_path=getFileName( False, namesVal[i]), images=np.expand_dims(valX[i], axis=0), model=inceptionModel)) if i == 0: valFeatures = transfer_values_val else: valFeatures = np.vstack((valFeatures, transfer_values_val)) feed_dict_val = { processedImages: valXGray, inceptionFeatureVec: valFeatures, y_true: y_valid_batch } valAcc = session.run(accuracy, feed_dict=feed_dict_val) * 100 accuracyList.append(valAcc) #print("[%d]: Val:%f" % (batchCount, valAcc)) batchCount += 1 if batchCount == numberOfBatches: break return (sum(accuracyList) * 1.00 / len(accuracyList))
def get_features(images): #Ineption Net uses values of images from (0,255). Hence the checkpoint ensures the normalization correctness if(np.max(images[0])<=1.0): images = images * 255.0 #returning features values features = transfer_values_cache(images=images,model=model) return features
def load_or_cache_transfer_data(self, images, file_path): """Function that returns raw images into transfer values and saves them """ if file_path[-4:] != ".npy": file_path = file_path + ".npy" transfer_vals = transfer_values_cache(cache_path=file_path, images=images, model=self.backend_model) return transfer_vals
def cache_images_dir(dir, max_n=1000, replace=False): # Inception inception.maybe_download() model = inception.Inception() # Storage file_path_cache = os.path.join(dir + '/images_features.pkl') if replace: os.remove(file_path_cache) print("Processing Inception transfer-values...") dir_im = dir + '/pics' n_total_images = sum([len(files) for r, d, files in os.walk(dir_im)]) n_total_images = min([max_n, n_total_images]) print('Fetching %d images in %s ...' % (n_total_images, dir_im)) images = np.zeros((n_total_images, 192, 256, 3), dtype=np.float32) id = [] index = 0 n_err = 0 for d in os.listdir(dir_im): if index >= max_n: break d = dir_im + '/' + d for image_name in os.listdir(d): if index >= max_n: break image_path = d + '/' + image_name try: image_data = (misc.imread(image_path)[:, :, :3]).astype( np.float32) images[ index, :, :, :] = image_data # (n, height, width, channels) id.append(os.path.splitext(image_name)[0]) index += 1 except OSError as err: print(err) n_err += 1 if n_err > 0: images = np.delete(images, range(n_total_images - n_err, n_total_images), 0) id = np.array(id) transfer_values = inception.transfer_values_cache( cache_path=file_path_cache, images=images, model=model) return transfer_values, id
def load_cifar10_transfer_values(): # load inception model model = load_inception_model() # load cifar10 dataset data = load_cifar10() x_train, y_train_cls, y_train, x_test, y_test_cls, y_test, cls_names = data # compute, cache, and read transfer-values data_dir = "data/CIFAR-10/" file_path_cache_train = os.path.join(data_dir, 'inception_cifar10_train.pkl') file_path_cache_test = os.path.join(data_dir, 'inception_cifar10_test.pkl') print("Processing Inception transfer-values for training-images ...") # Scale images because Inception needs pixels to be between 0 and 255, # while the CIFAR-10 functions return pixels between 0.0 and 1.0 images_scaled = x_train * 255.0 # If transfer-values have already been calculated then reload them, # otherwise calculate them and save them to a cache-file. x_train_transfer_values = transfer_values_cache( cache_path=file_path_cache_train, images=images_scaled, model=model) print("Processing Inception transfer-values for test-images ...") # Scale images because Inception needs pixels to be between 0 and 255, # while the CIFAR-10 functions return pixels between 0.0 and 1.0 images_scaled = x_test * 255.0 # If transfer-values have already been calculated then reload them, # otherwise calculate them and save them to a cache-file. x_test_transfer_values = transfer_values_cache( cache_path=file_path_cache_test, images=images_scaled, model=model) data = (x_train, y_train_cls, y_train, x_test, y_test_cls, y_test, cls_names, x_train_transfer_values, x_test_transfer_values) return data
#images = images_test[0:9] #cls_true = cls_test[0:9] #plot_images(images, cls_true) '''下载inception model''' inception.maybe_download() model = inception.Inception() '''训练和测试的cache的路径''' file_path_cache_train = os.path.join(cifar10.data_path, 'inception_cifar10_train.pkl') file_path_cache_test = os.path.join(cifar10.data_path, 'inception_cifar10_test.pkl') print('处理训练集上的transfer-values.......... ') image_scaled = images_train * 255.0 # cifar-10的pixel是0-1的, shape=(50000, 32, 32, 3) transfer_values_train = transfer_values_cache( cache_path=file_path_cache_train, images=image_scaled, model=model) # shape=(50000, 2048) print('处理测试集上的transfer-values.......... ') images_scaled = images_test * 255.0 transfer_values_test = transfer_values_cache(cache_path=file_path_cache_test, model=model, images=images_scaled) print("transfer_values_train: ", transfer_values_train.shape) print("transfer_values_test: ", transfer_values_test.shape) '''显示transfer values''' def plot_transfer_values(i): print("输入图片:") plt.imshow(images_test[i], interpolation='nearest') plt.show()
file_path_train = os.path.join(cifar10.data_path, 'inception_cifar10_train.pkl') file_path_test = os.path.join(cifar10.data_path, 'inception_cifar10_test.pkl') print( "Processing Inception transfer-values for the training images of Cifar-10 ..." ) # First we need to scale the imgs to fit the Inception model requirements as it requires all pixels to be from 0 to 255, # while our training examples of the CIFAR-10 pixels are between 0.0 and 1.0 imgs_scaled = training_images * 255.0 # Checking if the transfer-values for our training images are already calculated and loading them, if not calcaulate and save them. transfer_values_training = transfer_values_cache(cache_path=file_path_train, images=imgs_scaled, model=inception_model) print( "Processing Inception transfer-values for the testing images of Cifar-10 ..." ) # First we need to scale the imgs to fit the Inception model requirements as it requires all pixels to be from 0 to 255, # while our training examples of the CIFAR-10 pixels are between 0.0 and 1.0 imgs_scaled = testing_images * 255.0 # Checking if the transfer-values for our training images are already calculated and loading them, if not calcaulate and save them. transfer_values_testing = transfer_values_cache(cache_path=file_path_test, images=imgs_scaled, model=inception_model)
# plot_images(images=images, cls_true=label_list, smooth=False) plot_images(images=images, cls_true=cls_true, smooth=False) #============================================================================== inception.maybe_download() model = inception.Inception() from inception import transfer_values_cache file_path_cache_train = os.path.join(os.getcwd(), 'inception_tiny_train.pkl') file_path_cache_test = os.path.join(os.getcwd(), 'inception_tiny_test.pkl') images = train_data[3000:6000, :, :, :] cls_true = train_label[3000:6000] transfer_values_train = transfer_values_cache(cache_path=file_path_cache_train, images=images, model=model) def plot_scatter(values, cls): # Create a color-map with a different color for each class. import matplotlib.cm as cm cmap = cm.rainbow(np.linspace(0.0, 1.0, num_classes)) # Get the color for each sample. colors = cmap[cls] # Extract the x- and y-values. x = values[:, 0] y = values[:, 1]
# 绘制图像查看输出 # images = load_images(image_paths=image_paths_test[0:9]) # cls_true = cls_test[0:9] # plot_images(images=images, cls_true=cls_true, smooth=True) # 导入inception model = inception.Inception() # 设置缓存目录并预处理 file_path_train = os.path.join(data_dir, 'inception-knifey-train.pkl') file_path_test = os.path.join(data_dir, 'inception-knifey-test.pkl') print("Processing Inception transfer-values for training-images ...") transfer_values_train = transfer_values_cache(cache_path=file_path_train, image_paths=image_paths_train, model=model) print("processing Inception transfer-values for test-images ...") transfer_values_test = transfer_values_cache(cache_path=file_path_test, image_paths=image_paths_test, model=model) print(transfer_values_train.shape) # (4170, 2048) print(transfer_values_test.shape) # (530, 2048) def plot_transfer_values(i): print("Input image:") # Plot the i'th image from the test-set. image = imread(image_paths_test[i])
train_batch_size = 64 ### DOWNLOAD INCEPTION MODEL ### inception.maybe_download() ### LOAD INCEPTION MODEL ### model = inception.Inception() ### CALCULATE TRANSFER-VALUES ### from inception import transfer_values_cache kaggle_file_path_cache_train = os.path.join(config.TRANSFER_VALUES_DIR, 'inception_kaggle_train.pkl') kaggle_transfer_values_train = transfer_values_cache( cache_path=kaggle_file_path_cache_train, images=None, model=model) ### NEW CLASSIFIER IN TENSORFLOW ### # Placeholder Variables transfer_len = model.transfer_len x = tf.placeholder(tf.float32, shape=[None, transfer_len], name='x') y_true = tf.placeholder(tf.float32, shape=[None, num_classes], name='y_true') y_true_cls = tf.argmax(y_true, dimension=1) # Neural Network x_pretty = pt.wrap(x) # Wrap the transfer-values as a Pretty Tensor object. with pt.defaults_scope(activation_fn=tf.nn.relu): y_pred, loss = x_pretty.fully_connected( size=1024, name='layer_fc1').softmax_classifier(num_classes=num_classes,
def main(): # Realms to be processed through the CNN # NOTE: THESE MUST HAVE BEEN THROUGH JSON_TO_DATASCIENCE_FORMAT.py FIRST! # The well-named realms we've determined were this list REALMS = ['dataminesjc', 'rubicon-fmap', 'gamut-prod', 'rubicon-fbmp', 'nbcuni-superstore', 'jumpshot-jsc', 'rubicon-fbmq', 'rally-health-integration', 'prudential-nj-exp2', 'rubicon-fmaq'] #REALMS = ['gamut-prod'] # Iterates through all realms for REALM in REALMS: # Metrics that we have defined _METRICS = ['vmsram', 'tasks', 't_rscthnetno', 't_rscthhfsrb', 'c_ucpupct'] _NUM_METRICS = len(_METRICS) # Load the training/test data as well as their respective lengths training_data = _load_obj(REALM + '^training_data') training_data['actual_lengths'] = _load_obj(REALM + '^training_length') test_data = _load_obj(REALM + '^test_data') test_data['actual_lengths'] = _load_obj(REALM + '^test_length') # Load the labels labels = _load_obj(REALM + '^labels') _NUM_LABELS = len(labels) # Create the phi versions of data phi_training_data, phi_test_data = _create_phi_data(training_data, test_data) training_data = None test_data = None # The number of data points in training and test so we can iterate over them later _NUM_TRAINING_DATA = (phi_training_data[_METRICS[0]]).shape[0] _NUM_TEST_DATA = (phi_test_data[_METRICS[0]]).shape[0] print "Loaded Data..." # Load the inception model (CNN) model = inception.Inception() # ********************************** Image composition, Training, and Testing ****************************************** # Matrices where element i contains the max length of the ith applications multiple metric time series max_dim_train, max_dim_test = _compose_dimension_matrices(phi_training_data, phi_test_data) # List of all CNN outputs to every image in the test dataset test_vectors = [] # Populate that list by composing temporary dictionaries and passing them through create_image and running it through the CNN # Iterate through test data for row in range(_NUM_TEST_DATA): # temporary dictionary to be passed to create_image temp_dict = {'actual_lengths': {}} # Populates the dictionary with the time series for one app and the longest of that apps time series for metric in _METRICS: temp_dict[metric] = phi_test_data[metric][row] temp_dict['actual_lengths'][metric] = phi_test_data['actual_lengths'][metric][row] input_dict = [temp_dict] largest_dim = max_dim_test[row] # Creates the image image = _create_image(input_dict, largest_dim) # Feeds image to CNN And stores the output in an output_vector output_vector = transfer_values_cache(images=image, model=model) # Add it to the list of all the other test vectors test_vectors.append(output_vector) # empties the already vectorized data to conserve memory for metric in _METRICS: phi_test_data[metric] = None # Saves the objects relevant to the DNN next step for test _save_obj(test_vectors, REALM + '^cnn_test_output') test_vectors = [] _save_obj(phi_test_data['labels'], REALM + '^test_labels') phi_test_data['labels'] = None _save_obj(phi_test_data['hot_labels'], REALM + '^test_hot_labels') phi_test_data = None # All the same stuff as above, but for training data train_vectors = [] for row in range(_NUM_TRAINING_DATA): temp_dict = {'actual_lengths': {}} for metric in _METRICS: temp_dict[metric] = phi_training_data[metric][row] temp_dict['actual_lengths'][metric] = phi_training_data['actual_lengths'][metric][row] input_dict = [temp_dict] largest_dim = max_dim_train[row] image = _create_image(input_dict, largest_dim) output_vector = transfer_values_cache(images=image, model=model) train_vectors.append(output_vector) for metric in _METRICS: phi_training_data[metric] = None _save_obj(train_vectors, REALM + '^cnn_training_output') train_vectors = [] _save_obj(phi_training_data['labels'], REALM + '^training_labels') phi_training_data['labels'] = None _save_obj(phi_training_data['hot_labels'], REALM + '^training_hot_labels') phi_training_data = None
def main(): # Realms to be processed through the CNN # NOTE: THESE MUST HAVE BEEN THROUGH JSON_TO_DATASCIENCE_FORMAT.py FIRST! # The well-named realms we've determined were this list #REALMS = ['dataminesjc', 'nbcuni-centralperk2', 'rubicon-fmap', 'gamut-prod', 'rubicon-fbmp', 'nbcuni-superstore', 'jumpshot-jsc', 'rubicon-fbmq', 'rally-health-integration', 'prudential-nj-exp2', 'rubicon-fmaq'] REALMS = ['gamut-prod'] # Iterates through all realms for REALM in REALMS: # Metrics that we have defined _NUM_METRICS = len(_METRICS) # Load the training/test data as well as their respective lengths training_data = _load_obj(REALM + '^training_data') training_data['actual_lengths'] = _load_obj(REALM + '^training_length') test_data = _load_obj(REALM + '^test_data') test_data['actual_lengths'] = _load_obj(REALM + '^test_length') print "Loaded data" # Load the labels labels = _load_obj(REALM + '^labels') _NUM_LABELS = len(labels) # The number of data points in training and test so we can iterate over them later max_dim_train, max_dim_test = _compose_dimension_matrices(training_data, test_data) new_training_data = [] for row, longest_metric_length in enumerate(max_dim_train): concatted = np.array([]) print "row, longest_metric_length: ", row, longest_metric_length for metric in _METRICS: filler = np.zeros(longest_metric_length) actual_length = training_data['actual_lengths'][metric][row] print "actual_length: ", actual_length filler[0:actual_length] = training_data[metric][row][0:actual_length] concatted = np.concatenate((concatted, filler), axis=0) new_training_data.append(concatted.tolist()) print "Created long training data" new_test_data = [] for row, longest_metric_length in enumerate(max_dim_test): concatted = np.array([]) for metric in _METRICS: filler = np.zeros(longest_metric_length) actual_length = test_data['actual_lengths'][metric][row] filler[0:actual_length] = training_data[metric][row][0:actual_length] concatted = np.concatenate((concatted,filler), axis=0) new_test_data.append(concatted.tolist()) print "Created long test data" phi_training_data = [np.arccos(row) for row in new_training_data] phi_test_data = [np.arccos(row) for row in new_test_data] print "Phi'd the data" model = inception.Inception() print "Loaded the model" test_vectors = [] for row in phi_test_data: temp_sin = np.sin(row) temp_sin = temp_sin.reshape((len(temp_sin),1)) temp_cos = np.cos(row) temp_cos = temp_cos.reshape((len(temp_cos),1)) image = np.zeros((1,len(temp_sin),len(temp_sin),3)) image[0,:,:,0] = _interpolation(np.dot(temp_sin,temp_cos.T) - np.dot(temp_cos,temp_sin.T)) image+=1 image*=127.5 output_vector = transfer_values_cache(images=image, model=model) test_vectors.append(output_vector) print "Done with test cnn" phi_test_data = None # Saves the objects relevant to the DNN next step for test _save_obj(test_vectors, REALM + '^cnn_test_output_concat') print "Saved test cnn" # All the same stuff as above, but for training data train_vectors = [] for row in phi_training_data: temp_sin = np.sin(row) temp_sin = temp_sin.reshape((len(temp_sin),1)) temp_cos = np.cos(row) temp_cos = temp_cos.reshape((len(temp_cos),1)) image = np.zeros((1,len(temp_sin),len(temp_sin),3)) image[0,:,:,0] = _interpolation(np.dot(temp_sin,temp_cos.T) - np.dot(temp_cos,temp_sin.T)) image+=1 image*=127.5 output_vector = transfer_values_cache(images=image, model=model) train_vectors.append(output_vector) print "Done with train cnn" phi_training_data = None _save_obj(train_vectors, REALM + '^cnn_training_output_concat') print "Saved train cnn"
import inception import config import prettytensor as pt num_classes = 7 model = inception.Inception() ### CALCULATE TRANSFER-VALUES ### from inception import transfer_values_cache file_path_cache_test = os.path.join(config.TRANSFER_VALUES_DIR, 'inception_kaggle_test.pkl') transfer_values_test = transfer_values_cache(cache_path=file_path_cache_test, images=None, model=model) ### NEW CLASSIFIER IN TENSORFLOW ### # Placeholder Variables transfer_len = model.transfer_len x = tf.placeholder(tf.float32, shape=[None, transfer_len], name='x') y_true = tf.placeholder(tf.float32, shape=[None, num_classes], name='y_true') y_true_cls = tf.argmax(y_true, dimension=1) # Neural Network x_pretty = pt.wrap(x) # Wrap the transfer-values as a Pretty Tensor object. with pt.defaults_scope(activation_fn=tf.nn.relu): y_pred, loss = x_pretty.fully_connected( size=1024,
return np.asarray(images) # inception 模型 model = inception.Inception() file_path_cache_train = os.path.join(data_dir, 'inception-knifey-train.pkl') file_path_cache_test = os.path.join(data_dir, 'inception-knifey-test.pkl') # 缓存 print("Processing Inception transfer-values for training-images ...") # If transfer-values have already been calculated then reload them, # otherwise calculate them and save them to a cache-file. transfer_values_train = inception.transfer_values_cache(cache_path=file_path_cache_train, image_paths=image_paths_train, model=model) print("Processing Inception transfer-values for test-images ...") # If transfer-values have already been calculated then reload them, # otherwise calculate them and save them to a cache-file. transfer_values_test = inception.transfer_values_cache(cache_path=file_path_cache_test, image_paths=image_paths_test, model=model) # keras模型 transfer_len = model.transfer_len inputs = Input(shape=(int(transfer_len), )) net = Dense(units=1024, activation='relu')(inputs) net = Dense(units=4, activation='softmax')(net)
def main(splitnum, finalimgpath): # print stuff print("=========================================") print(splitnum) train_batch_size = 64 def random_batch(): # Number of images (transfer-values) in the training-set. num_images = len(transfer_values_train) # Create a random index. idx = np.random.choice(num_images, size=train_batch_size, replace=False) # Use the random index to select random x and y-values. # We use the transfer-values instead of images as x-values. x_batch = transfer_values_train[idx] y_batch = labels_train[idx] return x_batch, y_batch def optimize(num_iterations): # Start-time used for printing time-usage below. start_time = time.time() # Else, save for the first time saver = tf.train.Saver(tf.all_variables(), max_to_keep=100) session_list = [] test_acc = [] for i in range(num_iterations): # Get a batch of training examples. # x_batch now holds a batch of images (transfer-values) and # y_true_batch are the true labels for those images. x_batch, y_true_batch = random_batch() # Put the batch into a dict with the proper names # for placeholder variables in the TensorFlow graph. feed_dict_train = {x: x_batch, y_true: y_true_batch} # Run the optimizer using this batch of training data. # TensorFlow assigns the variables in feed_dict_train # to the placeholder variables and then runs the optimizer. # We also want to retrieve the global_step counter. i_global, _ = session.run([global_step, optimizer], feed_dict=feed_dict_train) # Print status to screen every 100 iterations (and last). if (i_global % 100 == 0) or (i == num_iterations - 1): savepath = saver.save(session, 'checkpoints\\split1\\model', global_step=i_global) session_list.append(savepath) # Calculate the accuracy on the training-batch. batch_acc = session.run(accuracy, feed_dict=feed_dict_train) # Test accuracy with session correct, cls_pred = predict_cls_test() acc, num_correct = classification_accuracy(correct) # Print status. # msg = "Global Step: {0:>6}, Training Batch Accuracy: {1:>6.1%}" # print(msg.format(i_global, batch_acc)) # print("Testing Accuracy: ", round(acc*100, 2)) # save test accuracy test_acc.append(round(acc * 100, 2)) # print("========================================================") # Ending time. end_time = time.time() # Difference between start and end-times. time_dif = end_time - start_time # Print the time-usage. print("Time usage: " + str(timedelta(seconds=int(round(time_dif))))) max_acc = max(test_acc) print("MAX: ", max_acc) # print(list(test_acc.values()).index(max_acc)) print(test_acc.index(max_acc)) # print(mydict.values().index(max(test_acc.values()))) # pth = session_list[test_acc.index(max_acc)] saver.restore(session, pth) return def plot_example_errors(cls_pred, correct): # This function is called from print_test_accuracy() below. # cls_pred is an array of the predicted class-number for # all images in the test-set. # correct is a boolean array whether the predicted class # is equal to the true class for each image in the test-set. # Negate the boolean array. incorrect = (correct == False) # Get the images from the test-set that have been # incorrectly classified. images = images_test[incorrect] # Get the predicted classes for those images. cls_pred = cls_pred[incorrect] # Get the true classes for those images. cls_true = cls_test[incorrect] n = min(9, len(images)) # Plot the first n images. plot_images(images=images[0:n], cls_true=cls_true[0:n], cls_pred=cls_pred[0:n]) def plot_confusion_matrix(cls_pred): # This is called from print_test_accuracy() below. # cls_pred is an array of the predicted class-number for # all images in the test-set. # Get the confusion matrix using sklearn. cm = confusion_matrix( y_true=cls_test, # True class for test-set. y_pred=cls_pred) # Predicted class. # Print the confusion matrix as text. for i in range(num_classes): # Append the class-name to each line. class_name = "({}) {}".format(i, class_names[i]) print(cm[i, :], class_name) # Print the class-numbers for easy reference. class_numbers = [" ({0})".format(i) for i in range(num_classes)] print("".join(class_numbers)) # Split the data-set in batches of this size to limit RAM usage. batch_size = 256 def predict_cls(transfer_values, labels, cls_true): # Number of images. num_images = len(transfer_values) # Allocate an array for the predicted classes which # will be calculated in batches and filled into this array. cls_pred = np.zeros(shape=num_images, dtype=np.int) # Now calculate the predicted classes for the batches. # We will just iterate through all the batches. # The starting index for the next batch is denoted i. i = 0 while i < num_images: # The ending index for the next batch is denoted j. j = min(i + batch_size, num_images) # Create a feed-dict with the images and labels # between index i and j. feed_dict = {x: transfer_values[i:j], y_true: labels[i:j]} # Calculate the predicted class using TensorFlow. cls_pred[i:j] = session.run(y_pred_cls, feed_dict=feed_dict) # Set the start-index for the next batch to the # end-index of the current batch. i = j # Create a boolean array whether each image is correctly classified. correct = (cls_true == cls_pred) return correct, cls_pred def predict_one_image(imgarr): # Number of images. num_images = 1 label = np.zeros(shape=[0, 2], dtype=np.int) # Allocate an array for the predicted classes which # will be calculated in batches and filled into this array. cls_pred = np.zeros(shape=num_images, dtype=np.int) feed_dict = {x: imgarr, y_true: label} cls_pred = session.run(y_pred_cls, feed_dict=feed_dict) return cls_pred def predict_cls_test(): return predict_cls(transfer_values=transfer_values_test, labels=labels_test, cls_true=cls_test) def classification_accuracy(correct): # When averaging a boolean array, False means 0 and True means 1. # So we are calculating: number of True / len(correct) which is # the same as the classification accuracy. # Return the classification accuracy # and the number of correct classifications. return correct.mean(), correct.sum() def print_test_accuracy(show_example_errors=False, show_confusion_matrix=False): # For all the images in the test-set, # calculate the predicted classes and whether they are correct. correct, cls_pred = predict_cls_test() # Classification accuracy and the number of correct classifications. acc, num_correct = classification_accuracy(correct) # Number of images being classified. num_images = len(correct) # Print the accuracy. msg = "Accuracy on Test-Set: {0:.1%} ({1} / {2})" print(msg.format(acc, num_correct, num_images)) # Plot some examples of mis-classifications, if desired. if show_example_errors: print("Example errors:") plot_example_errors(cls_pred=cls_pred, correct=correct) # Plot the confusion matrix, if desired. if show_confusion_matrix: print("Confusion Matrix:") plot_confusion_matrix(cls_pred=cls_pred) # ========================================================================================= # THIS IS WHERE EVERYTHING COMES TOGETHER # ========================================================================================= test, train, val = transfer.preprocessing( "D:\\AI Stuff\\aiproject-inception-master\\breakhissplits_v2\\train_val_test_60_12_28\\" ) transfer.split(test, train, val, str(splitnum)) tumordata.add_data_path(splitnum) tumordata.start() print(tumordata.data_path) class_names = tumordata.load_class_names() images_train, cls_train, labels_train = tumordata.load_training_data() images_test, cls_test, labels_test = tumordata.load_testing_data() print("Size of:") print("- Training-set:\t\t{}".format(len(images_train))) print("- Test-set:\t\t{}".format(len(images_test))) # Image to predict on img = Image.open(finalimgpath) imgarr = [] imgarr.append(np.array(img)) # inception dir inception.data_dir = 'inception/' # download the model inception.maybe_download() # load model model = inception.Inception() # caches for training and test sets file_path_cache_train = os.path.join(tumordata.data_path, 'inception_tumordata_train.pkl') file_path_cache_test = os.path.join(tumordata.data_path, 'inception_tumordata_test.pkl') file_path_cache_single_test = os.path.join( tumordata.data_path, 'inception_tumordata_single_test.pkl') print("Processing Inception transfer-values for training-images ...") # If transfer-values have already been calculated then reload them, # otherwise calculate them and save them to a cache-file. transfer_values_train = transfer_values_cache( cache_path=file_path_cache_train, images=images_train, model=model) print("Processing Inception transfer-values for test-images ...") # If transfer-values have already been calculated then reload them, # otherwise calculate them and save them to a cache-file. transfer_values_test = transfer_values_cache( cache_path=file_path_cache_test, images=images_test, model=model) transfer_values_single_test = transfer_values_cache( cache_path=file_path_cache_single_test, images=imgarr, model=model) # print("TRANSFER VALUES TEST: ", transfer_values_test) transfer_len = model.transfer_len x = tf.placeholder(tf.float32, shape=[None, transfer_len], name='x') y_true = tf.placeholder(tf.float32, shape=[None, num_classes], name='y_true') y_true_cls = tf.argmax(y_true, dimension=1) # x_one = tf.placeholder(tf.float32, shape=[len(imgarr), len(imgarr[0]), 3], name='x_one') # y_true_one = tf.placeholder(tf.float32, shape=1, name='y_true_one') # y_true_cls_one = tf.argmax(y_true_one, dimension=1) # Wrap the transfer-values as a Pretty Tensor object. x_pretty = pt.wrap(x) with pt.defaults_scope(activation_fn=tf.nn.relu): y_pred, loss = x_pretty.\ fully_connected(size=1024, name='layer_fc1').\ softmax_classifier(num_classes=num_classes, labels=y_true) global_step = tf.Variable(initial_value=0, name='global_step', trainable=False) optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize( loss, global_step) y_pred_cls = tf.argmax(y_pred, dimension=1) correct_prediction = tf.equal(y_pred_cls, y_true_cls) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) session = tf.Session() session.run(tf.global_variables_initializer()) print_test_accuracy(show_example_errors=False, show_confusion_matrix=False) optimize(1000) print_test_accuracy(show_example_errors=False, show_confusion_matrix=False) correct, cls_pred = predict_cls_test() acc, num_correct = classification_accuracy(correct) # print("acc, num correct: ", acc, num_correct) cls_pred = predict_one_image(transfer_values_single_test) # print("PREDICTION") # print(cls_pred) print(">>>>>>>>>>>><<<<<<<<<<<<<<<<") # prediction = model.classify(finalimgpath) return acc, cls_pred
# print("X test : original : "+str(train_image_matrix_tmp.shape)) # print("Y original : "+str(test_image_matrix_tmp.shape)) # train_image_matrix = train_image_matrix_tmp # test_image_matrix = test_image_matrix_tmp # train_image_matrix_tmp,test_image_matrix_temp = sess.run([tf_train_images,tf_test_images]) # ------------------------------------------------------------------------------- # print(tf_train_images) # print("Rank : "+str(tf.rank(tf_train_images))) print("_________________________________________________________________") transfer_values_train = transfer_values_cache(cache_path=file_path_cache_train, images=train_image_matrix, model=model) # test images transfer_values_test = transfer_values_cache(cache_path=file_path_cache_test, images=test_image_matrix, model=model) # test-point : # transfer_values_train = transfer_values_train[0:300] # transfer_values_test = transfer_values_test[0:300] print("Completed matrix to transfer values ") # ________________________________________________________________________________________ print("Completed transfer values cals for TRAIN and test ") # print(transfer_values_train_op)
from knifey import num_classes data_dir = knifey.data_dir dataset = knifey.load() model = inception.Inception() from inception import transfer_values_cache image_paths_pred, cls_pred, labels_pred = dataset.get_pred_set() file_path_cache_pred = os.path.join(data_dir, 'cnn-pred.pkl') transfer_values_pred = transfer_values_cache(cache_path=file_path_cache_pred, image_paths=image_paths_pred, model=model) literal_labels = [ "skiing", "hurdling", "bmx", "rowing", "baseball", "polevault", "hammerthrow", "tennis", "soccer", "golf" ] with tf.Session() as session: def new_prediction(): saver = tf.train.import_meta_graph( 'checkpoint/transfer/inception_cnn.meta') saver.restore(session, tf.train.latest_checkpoint('checkpoint/transfer')) graph = tf.get_default_graph()
0:face_1.shape[1]] = face_1[0:face_1.shape[0], 0:face_1.shape[1]] face_2_list[i, 0:face_2.shape[0], 0:face_2.shape[1]] = face_2[0:face_2.shape[0], 0:face_2.shape[1]] relation_traits_list[i] = get_relation_traits(row) file.close() if not os.path.exists(output_path): os.makedirs(output_path) from inception import transfer_values_cache transfer_values_face_1 = transfer_values_cache(cache_path=output_path + 'inception_face_1_train.pkl', images=face_1_list, model=model) transfer_values_face_2 = transfer_values_cache(cache_path=output_path + 'inception_face_2_train.pkl', images=face_2_list, model=model) conc = np.concatenate( (spatial_cue_list, get_softmax_layer(transfer_values_face_1, aflw), get_softmax_layer(transfer_values_face_1, kaggle), get_softmax_layer(transfer_values_face_1, celeb), get_softmax_layer(transfer_values_face_2, aflw), get_softmax_layer(transfer_values_face_2, kaggle), get_softmax_layer(transfer_values_face_2, celeb)), axis=1)
def train(num_iteration, inceptionModel, trainProcessedYielder, valProcessedYielder, valYielder, typeRun): total_iterations = 0 for iteration in range(num_iteration): currentResult = predictValidation(inceptionModel, valYielder) fileHandler = open(logFile, "a") fileHandler.write( "======= Accuracy: ========== {0}\n".format(currentResult)) print("======= Accuracy: %f ==========" % currentResult) fileHandler.close() batchCount = 0 while (True): trainBatchX, trainXProcessed, trainBatchY, numberOfBatches, namesTrain = next( trainProcessedYielder) y_true_batch = toOneHotEncodingVectorForm(trainBatchY.flatten(), 0, 19) valX, valXProcessed, valY, _, namesVal = next(valProcessedYielder) y_valid_batch = toOneHotEncodingVectorForm(valY.flatten(), 0, 19) for i in range(trainBatchX.shape[0]): transfer_values_train = np.array( transfer_values_cache(cache_path=getFileName( True, namesTrain[i]), images=np.expand_dims(trainBatchX[i], axis=0), model=inceptionModel)) if i == 0: trainFeatures = transfer_values_train else: trainFeatures = np.vstack( (trainFeatures, transfer_values_train)) for i in range(valX.shape[0]): transfer_values_val = np.array( transfer_values_cache(cache_path=getFileName( False, namesVal[i]), images=np.expand_dims(valX[i], axis=0), model=inceptionModel)) if i == 0: valFeatures = transfer_values_val else: valFeatures = np.vstack((valFeatures, transfer_values_val)) feed_dict_tr = { processedImages: trainXProcessed, inceptionFeatureVec: trainFeatures, y_true: y_true_batch } feed_dict_val = { processedImages: valXProcessed, inceptionFeatureVec: valFeatures, y_true: y_valid_batch } session.run([optimizer, cross_entropy], feed_dict=feed_dict_tr) trainAcc = session.run(accuracy, feed_dict=feed_dict_tr) * 100 valAcc = session.run(accuracy, feed_dict=feed_dict_val) * 100 fileHandler = open(logFile, "a") fileHandler.write("[{0}-{1}]: Train:{2}, Val:{3}\n".format( batchCount, iteration, trainAcc, valAcc)) fileHandler.close() print("[%d-%d]: Train:%f, Val:%f" % (batchCount, iteration, trainAcc, valAcc)) batchCount += 1 if batchCount == numberOfBatches: break saver.save( session, "./checkpoints/iter_" + typeRun + str(num_iteration) + typeOfFile + time.strftime("%Y%m%d-%H%M%S") + ".ckpt") total_iterations += num_iteration currentResult = predictValidation(inceptionModel, valProcessedYielder) fileHandler = open(logFile, "a") fileHandler.write( "======= Accuracy: ========== {0}\n".format(currentResult)) print("======= Accuracy: %f ==========" % currentResult) fileHandler.close()
def main(splitnum): test, train, val = transfer.preprocessing( "/home/runefeather/Desktop/Classwork/AI/Project/breakhissplits_v2/train_val_test_60_12_28/" ) transfer.split(test, train, val, str(splitnum)) tumordata.add_data_path(splitnum) tumordata.start() print(tumordata.data_path) class_names = tumordata.load_class_names() images_train, cls_train, labels_train = tumordata.load_training_data() images_test, cls_test, labels_test = tumordata.load_testing_data() print("Size of:") print("- Training-set:\t\t{}".format(len(images_train))) print("- Test-set:\t\t{}".format(len(images_test))) # inception dir inception.data_dir = 'inception/' # download the model inception.maybe_download() # load model model = inception.Inception() # caches for training and test sets file_path_cache_train = os.path.join(tumordata.data_path, 'inception_tumordata_train.pkl') file_path_cache_test = os.path.join(tumordata.data_path, 'inception_tumordata_test.pkl') print("Processing Inception transfer-values for training-images ...") # If transfer-values have already been calculated then reload them, # otherwise calculate them and save them to a cache-file. transfer_values_train = transfer_values_cache( cache_path=file_path_cache_train, images=images_train, model=model) print("Processing Inception transfer-values for test-images ...") # If transfer-values have already been calculated then reload them, # otherwise calculate them and save them to a cache-file. transfer_values_test = transfer_values_cache( cache_path=file_path_cache_test, images=images_test, model=model) transfer_len = model.transfer_len x = tf.placeholder(tf.float32, shape=[None, transfer_len], name='x') y_true = tf.placeholder(tf.float32, shape=[None, num_classes], name='y_true') y_true_cls = tf.argmax(y_true, dimension=1) # Wrap the transfer-values as a Pretty Tensor object. x_pretty = pt.wrap(x) with pt.defaults_scope(activation_fn=tf.nn.relu): y_pred, loss = x_pretty.\ fully_connected(size=1024, name='layer_fc1').\ softmax_classifier(num_classes=num_classes, labels=y_true) global_step = tf.Variable(initial_value=0, name='global_step', trainable=False) optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize( loss, global_step) y_pred_cls = tf.argmax(y_pred, dimension=1) correct_prediction = tf.equal(y_pred_cls, y_true_cls) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) session = tf.Session() session.run(tf.global_variables_initializer()) print_test_accuracy(session, x, y_true, y_pred_cls, transfer_values_test, labels_test, cls_test) session = optimize(5000, session, x, y_true, global_step, optimizer, accuracy, transfer_values_train, labels_train) print_test_accuracy(session, x, y_true, y_pred_cls, transfer_values_test, labels_test, cls_test) model.close() session.close()
def main(): # Name of file to load data from filenames = [ './data/newjsons/rubicon-fmap_1505242800000_7_days_app_data.json' ] # Can also point to a directory and look at all files ending in a certain file extension # filenames = _compile_filepath_list('.json') for filename in filenames: print "Opened and reading {0}...".format(filename) _FILE_NAME = filename REALM = _FILE_NAME.split('_')[0].split('/')[-1] # List of all metrics collected for each job _METRICS = [ 'vmsram', 'tasks', 't_rscthnetno', 't_rscthhfsrb', 'c_ucpupct' ] # Open data file and load data with open(_FILE_NAME) as infile: data_set = json.load(infile) # ************************************* Constitute the label vector ************************************************ app_ids = [] # Iterate through the data set and identify unique labels by the first _NUM_CHAR characters in the name of the jobs for app_id in data_set: name = data_set[app_id]['job_name'] lengths = [] for metric in _METRICS: try: lengths.append(len(data_set[app_id][metric])) except: continue length = max(lengths) app_ids.append((app_id, name, length)) # ******************************** Create Arrays of zeros for each metric's data *********************************** _INCREMENT = 10 print "Creating array of zeros..." # Max length of time series for each metric row_max_length = { 'vmsram': 0, 'tasks': 0, 't_rscthnetno': 0, 't_rscthhfsrb': 0, 'c_ucpupct': 0 } # Calculate the aforementioned max length for data in data_set: for metric in _METRICS: try: row_max_length[metric] = \ np.max([row_max_length[metric], int((data_set[data][metric][-1][0] - data_set[data][metric][0][0]) / _INCREMENT) + 1]) except: continue data_matrices = {} # Store arrays of zeros with max length calculated above for each metric for metric in _METRICS: data_matrices[metric] = np.zeros(shape=(len(app_ids), row_max_length[metric])) # **************************************** Insert actual data into data_matrices *********************************** # The actual length of each time series for each metric print "Filling the arrays with actual data..." actual_row_lengths = { 'vmsram': [], 'tasks': [], 't_rscthnetno': [], 't_rscthhfsrb': [], 'c_ucpupct': [] } # Iterate through the data and insert the actual data into the arrays of zeros to have arrays padded with zeros # where they contain no data for counter, data in enumerate(data_set): for metric in _METRICS: try: data_for_metric = np.array( map(lambda x: [0, np.nan] if x == None else x, data_set[data][metric])) length_of_ts = len(data_for_metric) actual_row_lengths[metric].append(length_of_ts) data_matrices[metric][counter][:length_of_ts] = [ a[1] for a in data_for_metric ] except: actual_row_lengths[metric].append(0) # maximums = {} # for metric in _METRICS: # maximums[metric] = np.nanmax(data_matrices[metric]) # print maximums # for row_num in range(len(app_ids)): # final_label = [] # for metric in _METRICS: # l = create_label_vector(data_matrices[metric][row_num][0:actual_row_lengths[metric][row_num]], maximums[metric]) # final_label.append(l) # print final_label # app_ids[row_num] = (app_ids[row_num][0], app_ids[row_num][1], final_label) # ****************************************** Normalize the Data *************************************************** print "Starting normalization..." # Scale the data down to a [-1,1] interval for metric in _METRICS: maximum = np.nanmax(data_matrices[metric]) minimum = np.nanmin(data_matrices[metric]) for row_number, row in enumerate(data_matrices[metric]): if maximum > minimum: row *= 2 row -= (maximum + minimum) row /= (maximum - minimum) for counter in range( actual_row_lengths[metric][row_number]): if row[counter] > 1: row[counter] = 1 if row[counter] < -1: row[counter] = -1 data_matrices[metric][row_number] = row print metric, " is normalized!" data_matrices['actual_lengths'] = actual_row_lengths model = inception.Inception() # ********************************** Image composition, Training, and Testing ****************************************** _NUM_DATA = data_matrices[_METRICS[0]].shape[0] phi_data = _create_phi_data(data_matrices) # Matrices where element i contains the max length of the ith applications multiple metric time series max_dim = _compose_dimension_matrices(phi_data) # List of all CNN outputs to every image in the test dataset cnn_vectors = [] # Populate that list by composing temporary dictionaries and passing them through create_image and running it through the CNN for row in range(_NUM_DATA): temp_dict = {'actual_lengths': {}} for metric in _METRICS: temp_dict[metric] = phi_data[metric][row] temp_dict['actual_lengths'][metric] = phi_data[ 'actual_lengths'][metric][row] input_dict = [temp_dict] largest_dim = max_dim[row] image = _create_image(input_dict, largest_dim) output_vector = transfer_values_cache(images=image, model=model) cnn_vectors.append(output_vector) # Save the output of the CNN to cnn_vectors and the application ids with their corresponding names in numerical order to app_ids # Meaning the first vector in cnn_vectors corresponds to the first app_id with its name _save_obj(cnn_vectors, REALM + '^cnn_vectors') _save_obj(app_ids, REALM + '^app_ids')
#Todo kendi datamızı çekeriz train_img, train_cls, train_labels = cifar10.load_training_data() test_img, test_cls, test_labels = cifar10.load_test_data() #Todo inceptionı bağlıyacağımız data nun dosya yolu girilir file_path_cache_train = os.path.join(cifar10.data_path, 'inception_cifar10_train.pkl') file_path_cache_test = os.path.join(cifar10.data_path, 'inception_cifar10_test.pkl') #Inceptionda resimler 0 255 arası bizim data ise 0 1 arası datayı inceptiona uygun hale getiriyorum images_scaled = train_img * 255.0 #Todo transfer values cache ile kendi datamızı inceptiondan geçiriyor ve transfer verileri elde ediyoruz transfer_values_train = transfer_values_cache(cache_path=file_path_cache_train, images=images_scaled, model=model) images_scaled = test_img * 255.0 transfer_values_test = transfer_values_cache(cache_path=file_path_cache_test, images=images_scaled, model=model) print(transfer_values_train.shape) #Todo inceptiondan çıkan sonuçları kendi oluştrduğumuz full connected layerden geçirme işlemi #bundan sonrası önceki uygulamalar ile aynı. x = tf.placeholder(tf.float32, [None, 2048]) y_true = tf.placeholder(tf.float32, [None, num_classes]) weight1 = tf.Variable(tf.truncated_normal([2048, 1024], stddev=0.1))
# Bring your packages onto the path import sys, os sys.path.append(os.path.abspath(os.environ['CIFAR_PATH'])) sys.path.append(os.path.abspath(os.environ['INCEPTION_PATH'])) import cifar10, inception from inception import transfer_values_cache # from cifar10 import num_classes # class_names = cifar10.load_class_names() # print cifar10.num_classes, class_names images_train, cls_train, labels_train = cifar10.load_training_data() model = inception.Inception() train_cache = os.path.join(cifar10.data_path, 'train_cache/') test_cache = os.path.join(cifar10.data_path, 'test_cache/') images_scaled = images_train * 255.0 transfer_values_train = transfer_values_cache(train_cache, images=images_scaled, model=model)