def grid_search(pipeline, train_path, test_path): parameters = { 'clf__C': (1, 10, 20), 'fs__k': (20000, 500000, 100000, 300000) } X_train, y_train = load_dataset(train_path) X_test, y_test = load_dataset(test_path) target_names = list(set([i[0] for i in y_train])) print("%d documents (training set)" % len(X_train)) print("%d documents (test set)" % len(X_test)) print("%d categories" % len(target_names)) print() gridsearch = GridSearchCV(pipeline, parameters, cv=5, n_jobs=-1, verbose=1) print("Performing grid search...") print("pipeline:", [name for name, _ in pipeline.steps]) print("parameters:") print(parameters) t0 = time() gridsearch.fit(X_train, y_train) print("done in %0.3fs" % (time() - t0)) print() params = [] print("Best dev score: %0.3f" % gridsearch.best_score_) print("Best parameters set:") best_parameters = gridsearch.best_estimator_.get_params() for param_name in sorted(parameters.keys()): print("\t%s: %r" % (param_name, best_parameters[param_name])) params.append((param_name, best_parameters[param_name])) print("Best test score: %0.3f" % gridsearch.score(X_test, y_test))
def main(): ''' combine all the opreations together ''' #1 define the path FILE_DIRECTORY = "hdfs://master:9000/user/hadoop/fault_diagnosis/dataset/" TRAIN_PATH = FILE_DIRECTORY + "train_set.csv" VALIDATE_PATH = FILE_DIRECTORY + "validate_set.csv" train_set = load_dataset(TRAIN_PATH) validate_set = load_dataset(VALIDATE_PATH) #2 model define PCA_model = pca_opreator(2) rf_model = random_forest_opreator() pipeline = Pipeline(stages=[PCA_model, rf_model]) model = pipeline.fit(train_set) #3 train_dataframe = model.transform(train_set) validate_dataframe = model.transform(validate_set) validate_dataframe.show(10) #4 evaluate evaluator = MulticlassClassificationEvaluator(labelCol="indexed_label", predictionCol="prediction", metricName="accuracy") accuracy_train = evaluator.evaluate(train_dataframe) accuracy_validate = evaluator.evaluate(validate_dataframe) print("accuracy on train set = %g" % (accuracy_train)) print("accuracy on validation set = %g" % (accuracy_validate))
def checkIndex(index): if index == '1': myName = input('Please insert your name:\n') print("Preparing for catching a pos image...") CatchFace("Catch A Pos Face", 500, './faceData/posFaceData') print("Pos model caught!") print("Continue?") conFlag = input('Y?:\n') if conFlag.upper() == 'Y': print("Preparing for catching a neg image...") CatchFace("Catch A Neg Face", 500, './faceData/negFaceData') print("Neg model caught!") print("Image loading, please wait...") load_dataset("./faceData") print("Data loaded!") print("Model crafting, please wait...") faceTrainMain(myName) clearFolder('./faceData/posFaceData') clearFolder('./faceData/negFaceData') print("Model Crafted!") showMenu() else: showMenu() elif index == '2': try: recFace() finally: showMenu() elif index == '3': print('Thank you for using!') exit() else: showMenu()
def update_dataset(self): create_dataset() output = load_dataset() count_all_images = len(os.listdir(base_dir / "Test_Sliced_Images")) predictions = {} image_count = 0 for image, label in load_dataset(): image = np.expand_dims(image, axis=2) / 255. prediction = self.model.predict(np.expand_dims(image, axis=0)) if label not in predictions: predictions[label] = {"prediction": prediction, "count": 1} continue predictions[label]["prediction"] += prediction predictions[label]["count"] += 1 image_count += 1 print(f"[+] prediction {round(image_count / count_all_images * 100, 2)}%") for i in predictions: predictions[i] = (predictions[i]["prediction"] / predictions[i]["count"]).tolist() with open(base_dir / "Saved_Model/outputs.json", "w") as log: log.write(json.dumps(predictions, ensure_ascii=False))
def _load_train_test_data_helper(): if FLAGS.tvt_options == 'all': dataset = load_dataset(FLAGS.dataset, 'all', FLAGS.align_metric, FLAGS.node_ordering) dataset.print_stats() # Node feature encoding must be done at the entire dataset level. print('Encoding node features') dataset, num_node_feat = encode_node_features(dataset=dataset) print('Splitting dataset into train test') dataset_train, dataset_test = dataset.tvt_split( [FLAGS.train_test_ratio], ['train', 'test']) elif FLAGS.tvt_options == 'train,test': dataset_test = load_dataset(FLAGS.dataset, 'test', FLAGS.align_metric, FLAGS.node_ordering) dataset_train = load_dataset(FLAGS.dataset, 'train', FLAGS.align_metric, FLAGS.node_ordering) dataset_train, num_node_feat_train = \ encode_node_features(dataset=dataset_train) dataset_test, num_node_feat_test = \ encode_node_features(dataset=dataset_test) if num_node_feat_train != num_node_feat_test: raise ValueError('num_node_feat_train != num_node_feat_test ' '{] != {}'.format(num_node_feat_train, num_node_feat_test)) num_node_feat = num_node_feat_train else: print(FLAGS.tvt_options) raise NotImplementedError() dataset_train.print_stats() dataset_test.print_stats() train_data = OurModelData(dataset_train, num_node_feat) test_data = OurModelData(dataset_test, num_node_feat) return train_data, test_data
def train_svm(): input_dir = 'data' train_dir = os.path.join(input_dir, 'processed_data', 'train') validation_dir = os.path.join(input_dir, 'processed_data', 'validation') trainX, trainy = load_dataset(train_dir) testX, testy = load_dataset(validation_dir) model = load_model(os.path.join('model', 'facenet_keras.h5')) trainX = get_embedded_data(model, trainX) testX = get_embedded_data(model, testX) trainX = normalize(trainX) testX = normalize(testX) label_encode = LabelEncoder() label_encode.fit(trainy) trainy = label_encode.transform(trainy) testy = label_encode.transform(testy) np.save(os.path.join('model', 'classes.npy'), label_encode.classes_) model = get_svm_model(trainX, trainy) filename = os.path.join('model', 'svm_model.sav') joblib.dump(model, filename) print("SVM model saved!") pred_train = model.predict(trainX) pred_test = model.predict(testX) score_train = accuracy_score(trainy, pred_train) score_test = accuracy_score(testy, pred_test) print("Accuracy\nTrain : ", score_train, "\n", "Test : ", score_test)
def main(): X_train, y_train = load_dataset( '../data/disaster_response_messages_training.csv', 'weather_related') X_val, y_val = load_dataset( '../data/disaster_response_messages_training.csv', 'weather_related') X_test, y_test = load_dataset( '../data/disaster_response_messages_test.csv', 'weather_related') # get annotated labels cats_train = get_categories(y_train) cats_val = get_categories(y_val) cats_test = get_categories(y_test) train_cnn(X_train, cats_train, X_val, cats_val, X_test, cats_test, 10)
def test_load_dataset(): """ Test loading the traffic datasets """ from load_data import load_dataset import os zhanqian = load_dataset(os.getcwd() + '/../data/zhanqian.csv') huizhou = load_dataset(os.getcwd() + '/../data/zhonglou.csv') ningguo = load_dataset(os.getcwd() + '/../data/ningguo.csv') xiyou = load_dataset(os.getcwd() + '/../data/xiyou.csv') assert not zhanqian.empty assert not huizhou.empty assert not ningguo.empty assert not xiyou.empty
def _plot_pairs(pairs, dataset_name, num_pairs, fix_match_pos, dir, want, want_gid_tuples, need_eps, mode, pick_best): dataset = load_dataset(dataset_name, 'all', 'mcs', 'bfs') dataset.print_stats() natts, *_ = get_dataset_conf(dataset_name) node_feat_name = natts[0] if len( natts) >= 1 else None # TODO: only one node feat if pairs is None: pairs = dataset.get_all_pairs() pairs, num_pairs = _filter_pairs(pairs, num_pairs, want_gid_tuples) assert num_pairs >= 1 and len(pairs) >= num_pairs, '{} {}'.format( num_pairs, len(pairs)) all_pair_gid_tuples = sorted(pairs.keys()) random.Random(123).shuffle(all_pair_gid_tuples) for i in range(num_pairs): gid1, gid2 = all_pair_gid_tuples[i] # if gid1 != 106: # continue # else: # pass g1 = dataset.look_up_graph_by_gid(gid1).get_nxgraph() g2 = dataset.look_up_graph_by_gid(gid2).get_nxgraph() fnb = '{}_{}_{}'.format(dataset_name, g1.graph['gid'], g2.graph['gid']) _plot_pairs_helper(fnb, g1, g2, pairs, node_feat_name, dataset, gid1, gid2, fix_match_pos, dir, want, need_eps, mode, pick_best)
def main(): # get nearest neighbor count if len(sys.argv) < 2: print("Please enter number of nearest neighbor:") no_of_nearest_neighbor = input() else: no_of_nearest_neighbor = sys.argv[1] # load dataset print( "Please put the dataset in data folder(named test.txt and train.txt)") train_set, test_set = load_dataset() print("Length of test set is %d and length of train set is %d" % (len(train_set), len(test_set))) # predictions p = prediction(train_set, test_set) print( "Select option- 1: Get Accuracy from test set, 2: Get Sentimental Analysis" ) option = int(input()) if (option == 1): print(p.get_accuracy(no_of_nearest_neighbor)) else: if (option == 2): print('Input string to get sentiments:') input_data = input() print(p.get_sentiments(input_data, no_of_nearest_neighbor)) else: print("Invalid option")
def test_cbir(): x_train, x_test, labels = load_dataset(os.getcwd(), False) encoder_model = De_Conv_Autoencoder() encoder_model.build_auto_encoder() encoder_model.compile() encoder_model.load() query_img = img_as_float(cv2.cvtColor(cv2.resize(cv2.imread( "test_img.jpg"), (124,124)), cv2.COLOR_BGR2GRAY)) #print(type(query_img) print(query_img.shape) img_feat_vect = extract_feat_query(encoder_model, query_img) predicted_cluster = cluster_query_img(img_feat_vect) plt.subplot(2,1,1) plt.imshow(query_img.reshape(124,124)) plt.gray() #plt.set_axis_off() plt.show() plt.close() plt.savefig("sample.jpg") # initlize feature index index = FeaturesIndex() index.load() for k in index.feat_index: print(k," ---> ", index.feat_index[k]) print(predicted_cluster) imgs_in_cluster = index.feat_index.get(predicted_cluster[0]) print(list(set(imgs_in_cluster))) print(len(list(set(imgs_in_cluster)))) plot_sample_imgs(x_train, list(set(imgs_in_cluster)))
def main(): train_set = [] for f in ["train.txt", "dev.txt"]: file = join(dirname(dirname(dirname(__file__))), "data", "vlsp2016", "corpus", f) train_set += load_dataset(file) train_set = train_set[:100] start = time.time() transformer = tagged.TaggedTransformer(template) X1, y1 = transformer.transform(train_set) end = time.time() py = end - start # py = 2.34531 start = time.time() transformer = tagged_cython.TaggedTransformer(template) X2, y2 = transformer.transform(train_set) end = time.time() cy = end - start print("Python:", py) print("Cython:", cy) print("Cython is {:0.3f}x faster ^-^".format(py / cy))
def main(): vocab, train_dataset, val_dataset, test_dataset = load_dataset(args.train_file, args.val_file, args.test_file) glove_twitter = nlp.embedding.create('glove', source=args.embedding_source, unknown_token='<unk>', init_unknown_vec=mx.nd.random_uniform) vocab.set_embedding(glove_twitter) ctx = mx.cpu() ## or mx.gpu(N) if GPU device N is available train_classifier(vocab, train_dataset, val_dataset, test_dataset, ctx)
def run_model(model_name): vocab_size, word_embeddings, train_iter, valid_iter, test_iter = load_data.load_dataset( ) learning_rate = config.learning_rate batch_size = config.batch_size output_size = config.output_size hidden_size = config.hidden_size embedding_length = config.embedding_length epochs = config.epochs in_channels = config.in_channels out_channels = config.out_channels kernel_heights = config.kernel_heights stride = config.stride padding = config.padding keep_probab = config.keep_probab if model_name == 'CNN': model = CNN.CNN(batch_size, output_size, in_channels, out_channels, kernel_heights, stride, padding, keep_probab, vocab_size, embedding_length, word_embeddings) elif model_name == 'LSTM': model = LSTM_Attn.AttentionModel(batch_size, output_size, hidden_size, vocab_size, embedding_length, word_embeddings) loss_fn = F.cross_entropy path = "Saved Models/" for epoch in range(epochs): train_loss, train_acc = train_model(model, train_iter, epoch, loss_fn) val_loss, val_acc, y_test, y_pred = eval_model(model, valid_iter, loss_fn) _, f, o = helper.getResult(y_test, y_pred) current_f1 = f['f1-score'] checkpoint_model(model, path, current_f1, epoch + 1, model_name, 'max') print( f'Epoch: {epoch+1:02}, Train Loss: {train_loss:.3f}, Train Acc: {train_acc:.2f}%, Val. Loss: {val_loss:3f}, Val. Acc: {val_acc:.2f}%' ) load_saved_model(model, path + '{}_best.pth'.format(model_name)) test_loss, test_acc, y_test, y_pred = eval_model(model, test_iter, loss_fn) print(f'Test Loss: {test_loss:.3f}, Test Acc: {test_acc:.2f}%') print( " Overall # Fake " ) print( " precision recall f1-score # precision recall f1-score" ) _, f, o = helper.getResult(y_test, y_pred) res = helper.printResult(model_name, o, f) print(res) path = model_name + "_results.txt" helper.saveResults(path, res)
def load(self, img_rows=IMAGE_SIZE, img_cols=IMAGE_SIZE, img_channels=3): # 加载数据集到内存 images, labels, face_num = load_dataset(self.path_name) self.nb_classes = face_num train_images, valid_images, train_labels, valid_labels = train_test_split(images, labels, test_size=0.3, random_state=random.randint(0, 100)) _, test_images, _, test_labels = train_test_split(images, labels, test_size=0.5, random_state=random.randint(0, 100)) # 当前的维度顺序如果为'th',则输入图片数据时的顺序为:channels,rows,cols,否则:rows,cols,channels # 这部分代码就是根据keras库要求的维度顺序重组训练数据集 if K.image_dim_ordering() == 'th': train_images = train_images.reshape(train_images.shape[0], img_channels, img_rows, img_cols) valid_images = valid_images.reshape(valid_images.shape[0], img_channels, img_rows, img_cols) test_images = test_images.reshape(test_images.shape[0], img_channels, img_rows, img_cols) self.input_shape = (img_channels, img_rows, img_cols) else: train_images = train_images.reshape(train_images.shape[0], img_rows, img_cols, img_channels) valid_images = valid_images.reshape(valid_images.shape[0], img_rows, img_cols, img_channels) test_images = test_images.reshape(test_images.shape[0], img_rows, img_cols, img_channels) self.input_shape = (img_rows, img_cols, img_channels) # 输出训练集、验证集、测试集的数量 print(train_images.shape[0], 'train samples') print(valid_images.shape[0], 'valid samples') print(test_images.shape[0], 'test samples') ''' 我们的模型使用categorical_crossentropy作为损失函数,因此需要根据类别数量nb_classes将 类别标签进行one-hot编码使其向量化,在这里我们的类别只有两种,经过转化后标签数据变为二维 ''' train_labels = np_utils.to_categorical(train_labels, self.nb_classes) valid_labels = np_utils.to_categorical(valid_labels, self.nb_classes) test_labels = np_utils.to_categorical(test_labels, self.nb_classes) # 像素数据浮点化以便归一化 train_images = train_images.astype('float32') valid_images = valid_images.astype('float32') test_images = test_images.astype('float32') # 将其归一化,图像的各像素值归一化到0~1区间 train_images /= 255 valid_images /= 255 test_images /= 255 self.train_images = train_images self.valid_images = valid_images self.test_images = test_images self.train_labels = train_labels self.valid_labels = valid_labels self.test_labels = test_labels
def load(self, img_rows = IMAGE_SIZE, img_cols = IMAGE_SIZE, img_channels = 3, nb_classes = 2): #載入資料集到記憶體 images, labels = load_dataset(self.path_name) train_images, valid_images, train_labels, valid_labels = train_test_split(images, labels, test_size = 0.3, random_state = random.randint(0, 100)) _, test_images, _, test_labels = train_test_split(images, labels, test_size = 0.5, random_state = random.randint(0, 100)) #當前的維度順序如果為'th',則輸入圖片資料時的順序為:channels,rows,cols,否則:rows,cols,channels #這部分程式碼就是根據keras庫要求的維度順序重組訓練資料集 #if K.image_dim_ordering() == 'th': if K.image_data_format() == "channels_first": #Keras新版寫法 train_images = train_images.reshape(train_images.shape[0], img_channels, img_rows, img_cols) valid_images = valid_images.reshape(valid_images.shape[0], img_channels, img_rows, img_cols) test_images = test_images.reshape(test_images.shape[0], img_channels, img_rows, img_cols) self.input_shape = (img_channels, img_rows, img_cols) else: train_images = train_images.reshape(train_images.shape[0], img_rows, img_cols, img_channels) valid_images = valid_images.reshape(valid_images.shape[0], img_rows, img_cols, img_channels) test_images = test_images.reshape(test_images.shape[0], img_rows, img_cols, img_channels) self.input_shape = (img_rows, img_cols, img_channels) #輸出訓練集、驗證集、測試集的數量 print(train_images.shape[0], 'train samples') print(valid_images.shape[0], 'valid samples') print(test_images.shape[0], 'test samples') #我們的模型使用categorical_crossentropy作為損失函式,因此需要根據類別數量nb_classes將 #類別標籤進行one-hot編碼使其向量化,在這裡我們的類別只有兩種,經過轉化後標籤資料變為二維 train_labels = np_utils.to_categorical(train_labels, nb_classes) valid_labels = np_utils.to_categorical(valid_labels, nb_classes) test_labels = np_utils.to_categorical(test_labels, nb_classes) #畫素資料浮點化以便歸一化 train_images = train_images.astype('float32') valid_images = valid_images.astype('float32') test_images = test_images.astype('float32') #將其歸一化,影象的各畫素值歸一化到0~1區間 train_images /= 255 valid_images /= 255 test_images /= 255 self.train_images = train_images self.valid_images = valid_images self.test_images = test_images self.train_labels = train_labels self.valid_labels = valid_labels self.test_labels = test_labels
def _test_helper_load(dataset_name, log_folder): # Load pairwise results including node-node matching matrix, log_folder = join(get_model_path(), 'Our', 'logs', log_folder) ld = load(join(log_folder, 'final_test_pairs.klepto')) pairs = ld['test_data_pairs'] print(len(pairs), 'pairs loaded') # Load graphs. dataset = load_dataset(dataset_name, 'all', 'mcs', 'bfs') # TODO: check bfs assumption dataset.print_stats() natts, *_ = get_dataset_conf(dataset_name) # node_feat_name = natts[0] if len(natts) >= 1 else None # TODO: only one node feat from node_feat import encode_node_features dataset, _ = encode_node_features(dataset) # TODO: should really load and reset flags but since encode_node_features only uses 'one_hot' it is fine for now return pairs, dataset
def runFromScratch(): # fix random seed for reproducibility seed = 7 numpy.random.seed(seed) evaluation_dir = '/home/himanshu/Anjali/experiments/e80_20/evaluation' print('Loading dataset...') X_train, y_train, X_test, y_test = load_dataset(threshold=0.2) # seaborn.countplot(y_train) # seaborn.countplot(y_test) # prepare the image for the VGG model X_train, X_test = preprocessDataset(X_train, X_test) model = defineModel() # compile model optimizr = optimizers.Adam(lr=0.0001) model.compile(loss='binary_crossentropy', optimizer=optimizr, metrics=['accuracy']) # model.compile( loss='binary_crossentropy', optimizer='adam', metrics=['accuracy', mean_pred] ) # fit model numEpochs = 20 batchSize = 12 print('Fitting the model...') createLogFile(evaluation_dir) csv_logger = CSVLogger(path.join(evaluation_dir, 'log.csv'), append=True, separator=';') history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=numEpochs, batch_size=batchSize, verbose=1, callbacks=[csv_logger]) # save model architecture and weights saveModelArchWeights(model, evaluation_dir) # save plots to disk savePlots(history, evaluation_dir) # evaluate model & print accuracy evaluateModel(X_test, y_test, model)
def main(): x_train, y_train, x_test, y_test = load_dataset() print('Training data shape: ', x_train.shape, ' Train labels shape: ', y_train.shape) print('Test data shape: ', x_test.shape, ' Test labels shape: ', y_test.shape) print() classifier = SoftmaxClassifier() loss, grad = classifier.cross_entropy_loss(x_train, y_train, 1e-5) # Gradient check for the model f = lambda w: classifier.cross_entropy_loss(x_train, y_train, 0.0)[0] print('Gradient Check:') grad_check(f, classifier.W, grad, 10) print() # Plot the loss for the training loss_record = classifier.train(x_train, y_train, lr=1e-6, reg=1e4) plt.plot(loss_record) plt.xlabel('Iteration number') plt.ylabel('Loss value') plt.show() # Evaluation on test set y_test_pred = classifier.predict(x_test) accuracy = np.mean(y_test == y_test_pred) print('Accuracy of the Softmax classifier on the test set: %f' % accuracy) # Visualize the learned weights for each class w = classifier.W[:, :-1] # Strip out the bias w = w.reshape(10, 32, 32, 3) w_min, w_max = np.min(w), np.max(w) classes = [ 'plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck' ] for i in range(10): plt.subplot(2, 5, i + 1) # Rescale the weights to be between 0 and 255 for image representation w_img = 255.0 * (w[i].squeeze() - w_min) / (w_max - w_min) plt.imshow(w_img.astype('uint8')) plt.axis('off') plt.title(classes[i]) plt.show()
def load(self, img_rows=IMAGE_SIZE, img_cols=IMAGE_SIZE, img_channels=3): # 装载数据集 images, labels, face_num = load_dataset(self.path_name) self.nb_classes = face_num # 按交叉验证的原则分割训练集和测试集 train_images, _, train_labels, _ = train_test_split( images, labels, test_size=0.1, random_state=random.randint(0, 100)) _, test_images, _, test_labels = train_test_split( images, labels, test_size=0.1, random_state=random.randint(0, 100)) # 判断后端系统:TensorFlow/Theano,重组训练数据集 if K.image_data_format() == 'channels_first': train_images = train_images.reshape(train_images.shape[0], img_channels, img_rows, img_cols) test_images = test_images.reshape(test_images.shape[0], img_channels, img_rows, img_cols) self.input_shape = (img_channels, img_rows, img_cols) else: train_images = train_images.reshape(train_images.shape[0], img_rows, img_cols, img_channels) test_images = test_images.reshape(test_images.shape[0], img_rows, img_cols, img_channels) self.input_shape = (img_rows, img_cols, img_channels) # 输出训练集和测试集的数量 print(train_images.shape[0], 'train samples') print(test_images.shape[0], 'test samples') # 根据类别数量nb_classes将类别标签进行one-hot编码使其向量化 train_labels = np_utils.to_categorical(train_labels, self.nb_classes) test_labels = np_utils.to_categorical(test_labels, self.nb_classes) # 像素数据浮点化以便归一化 train_images = train_images.astype('float32') test_images = test_images.astype('float32') # 归一化,图像的各像素值归一化到0~1区间 train_images /= 255 test_images /= 255 self.train_images = train_images self.test_images = test_images self.train_labels = train_labels self.test_labels = test_labels
def main(): """Defines an application's main functionality""" log.info("Started.") base_path = Path(config.base_dir) if base_path.exists() is False: base_path.mkdir(exist_ok=True) (x_train, y_train), (x_test, y_test), (word2id, id2word), (tag2id, id2tag) = load_dataset( config.corpus, test_ratio=0.1, data_dir=config.base_dir) voc_size = len(word2id) num_tags = len(id2tag) y_train_oh = keras.utils.np_utils.to_categorical(y_train, num_tags) y_test_oh = keras.utils.np_utils.to_categorical(y_test, num_tags) log.info("Data information") log.info("Size of training set: %d" % (x_train.shape[0])) log.info("Shape of training set: %s" % (repr(x_train.shape))) log.info("Size of test set: %d" % (x_test.shape[0])) log.info("Number of unique wordss: %d" % (len(word2id))) log.info("Number of unique tags: %d" % (num_tags)) log.info("Weights path: %s" % (config.weights_path)) if config.use_embedding is False: model = model_architecture.build_model(num_tags) else: model = model_architecture.build_model_with_embedding( num_tags, voc_size, config.sample_dimension) model.compile(optimizer='adam', loss=keras.losses.categorical_crossentropy, metrics=['categorical_accuracy']) model.fit(x=x_train, y=y_train_oh, validation_data=(x_test, y_test_oh), batch_size=128, epochs=config.epochs, verbose=1) # Use progress bar model.save_weights(config.weights_path)
def build_index(): de_conv_encoder = De_Conv_Autoencoder() de_conv_encoder.build_auto_encoder() de_conv_encoder.compile() de_conv_encoder.load() x_train, x_test, labels = load_dataset(os.getcwd(), False) feat_vect_list = [] #for i in range(len(x_train)): feat_vect_list = extract_feature(de_conv_encoder, x_train) n_clusters = 100 km_model = trigger_clustering(labels, np.array(feat_vect_list), count=n_clusters) Index_histogram(km_model, n_clusters)
def set_up(filename, melt=True): data = load_data.load_dataset(filename) col_names = { 0: "Fz", 1: "C3", 2: "Cz", 3: "C4", 4: "CP1", 5: "CPz", 6: "CP2", 7: "Pz" } df = pd.DataFrame.from_dict(data["y"]) df.rename(columns=col_names, inplace=True) df_trig = pd.DataFrame.from_dict(data["trig"]) df_trig.rename(columns={0: "trigger"}, inplace=True) df_full = pd.concat([df_trig, df.reindex(df.index)], axis=1) df_full.reset_index(inplace=True) df_full.rename(columns={'index': 'time'}, inplace=True) df_full['filename'] = filename df_full['subject'] = filename[0:2] df_full['condition'] = filename[3:].replace(".mat", "") df_full['data_quality'] = df_full['condition'].apply( lambda x: 'high' if 'high' in x else 'low') triggers = return_trig_dict(data['trig']) df_full['seq_type'] = df_full['time'].apply( lambda x: get_sequence_info(triggers, x, 'seq_type')) df_full['seq_index'] = df_full['time'].apply( lambda x: get_sequence_info(triggers, x, 'seq_index')) df_full['seq_start'] = df_full['time'].apply( lambda x: get_sequence_info(triggers, x, 'seq_start')) df_full['seq_time'] = df_full['time'] - df_full['seq_start'] if melt == True: df_full = pd.melt( df_full, id_vars=[ 'time', 'trigger', 'filename', 'subject', 'condition', 'data_quality', 'seq_type', 'seq_index', 'seq_start', 'seq_time' ], value_vars=['Fz', 'C3', 'Cz', 'C4', 'CP1', 'CPz', 'CP2', 'Pz'], var_name='electrode', value_name='amplitude') return df_full
def main(): load_checkpoint = "./bin/2021-Apr-21-20-53-26-lstm2/E9.pytorch" batch_size = 32 output_size = 2 hidden_size = 256 embedding_length = 300 num_samples = 1 TEXT, vocab_size, word_embeddings, train_iter, valid_iter, test_iter = load_data.load_dataset( ) model = LSTMClassifier2(batch_size, output_size, hidden_size, vocab_size, embedding_length, word_embeddings) if not os.path.exists(load_checkpoint): raise FileNotFoundError(load_checkpoint) model.load_state_dict(torch.load(load_checkpoint)) print("Model loaded from %s" % load_checkpoint) if torch.cuda.is_available(): model = model.cuda() model.eval() samples, z = model.inference(n=num_samples) print('----------SAMPLES----------') print(*idx2word(samples, i2w=TEXT.vocab.itos, pad_idx=model.pad_idx), sep='\n') exit() z1 = torch.randn([model.latent_size]).numpy() z2 = torch.randn([model.latent_size]).numpy() z = to_var( torch.from_numpy(interpolate(start=z1, end=z2, steps=8)).float()) samples, _ = model.inference(z=z) print('-------INTERPOLATION-------') print(*idx2word(samples, i2w=TEXT.vocab.itos, pad_idx=model.pad_idx), sep='\n')
def main(train_data_path: str, model_path: str): TEXT, vocab_size, word_embeddings, train_iter, valid_iter, test_iter = load_data.load_dataset( train_data_path) batch_size = 32 output_size = 2 hidden_size = 256 embedding_length = 300 # TODO: try other types of learning algorithms model = LSTMClassifier(batch_size, output_size, hidden_size, vocab_size, embedding_length, word_embeddings) for epoch in range(10): train_loss, train_acc = train_model(model, train_iter, epoch) val_loss, val_acc = eval_model(model, valid_iter) print( f'Epoch: {epoch + 1:02}, Train Loss: {train_loss:.3f}, Train Acc: {train_acc:.2f}%, Val. Loss: {val_loss:3f}, Val. Acc: {val_acc:.2f}%' ) test_loss, test_acc = eval_model(model, test_iter) print(f'Test Loss: {test_loss:.3f}, Test Acc: {test_acc:.2f}%') ''' Let us now predict the sentiment on a single sentence just for the testing purpose. ''' test_sen1 = "This is one of the best creation of Nolan. I can say, it's his magnum opus. Loved the soundtrack and especially those creative dialogues." test_sen1 = TEXT.preprocess(test_sen1) test_sen1 = [[TEXT.vocab.stoi[x] for x in test_sen1]] test_sen = np.asarray(test_sen1) test_sen = torch.from_numpy(test_sen) if torch.cuda.is_available(): test_sen = test_sen.cuda() model.eval() output = model(test_sen, 1) out = F.softmax(output, 1) if (torch.argmax(out[0]) == 1): print("Sentiment: Positive") else: print("Sentiment: Negative") # save the model torch.save(model.state_dict(), model_path)
def runSavedModel(): # fix random seed for reproducibility seed = 7 numpy.random.seed(seed) evaluation_dir = '/home/himanshu/Anjali/experiments/e10fold/evaluation' print('Loading dataset...') X, y = load_dataset() # seaborn.countplot(y_train) # seaborn.countplot(y_test) # prepare the image for the ResNet50 model X_train, X_test = preprocessDataset(X_train, X_test) # load the model architecture and weights model = loadModelArchWeights(evaluation_dir) # compile model optimizr = optimizers.Adam(lr=0.001) model.compile(loss='binary_crossentropy', optimizer=optimizr, metrics=['accuracy']) # model.compile( loss='binary_crossentropy', optimizer='adam', metrics=['accuracy', mean_pred] ) # # fit model # numEpochs = 15 # batchSize = 6 # print('Fitting the model...') # createLogFile(evaluation_dir) # csv_logger = CSVLogger(path.join(evaluation_dir,'log.csv'), append=True, separator=';') # history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=numEpochs, batch_size=batchSize, verbose=1, callbacks=[csv_logger]) # # save model architecture and weights # saveModelArchWeights(model, evaluation_dir) # # save plots to disk # savePlots(history, evaluation_dir) # evaluate model & print accuracy [y_predicted, y_probability, scores] = evaluateModel(X_test, y_test, model) # save predictions to disk savePredictions2disk(y_test, y_predicted, y_probability, evaluation_dir)
def rerun_from_loaded_logs(dataset_name, log_folder, theta): from utils import get_model_path, load from load_data import load_dataset from pprint import pprint print('theta {}'.format(theta)) log_folder = join(get_model_path(), 'Our', 'logs', log_folder) ld = load(join(log_folder, 'final_test_pairs.klepto')) pairs = ld['test_data_pairs'] dataset = load_dataset(dataset_name, 'all', 'mcs', 'bfs') # regenerate y_true_dict_list for gids in pairs.keys(): gid1, gid2 = gids g1 = dataset.look_up_graph_by_gid(gid1) g2 = dataset.look_up_graph_by_gid(gid2) pair_true = dataset.look_up_pair_by_gids(gid1, gid2) pair = pairs[gids] pair.assign_g1_g2(g1, g2) pair.assign_y_true_dict_list(pair_true.get_y_true_list_dict_view()) # construct flags import argparse parser = argparse.ArgumentParser() parser.add_argument('--only_iters_for_debug', type=int, default=None) parser.add_argument('--dataset', default=dataset_name) parser.add_argument('--align_metric', default='mcs') parser.add_argument('--theta', type=float, default=theta) parser.add_argument('--debug', type=bool, default='debug' in dataset_name) FLAGS = parser.parse_args() # call prediction code pair_list = [pairs[gids] for gids in pairs.keys()] global_result = eval_pair_list(pair_list, FLAGS) pprint(global_result) fn = join(log_folder, 'updated_results_theta_{}.txt'.format(theta)) with open(fn, 'w') as f: pprint(global_result, stream=f)
def main(args): TEXT, LABEL, vocab_size, word_embeddings, train_iter, valid_iter = load_data.load_dataset( args) #learning_rate = 2e-5 learning_rate = 0.00001 batch_size = BATCH_SIZE output_size = 2 # hidden_size = 256 hidden_size = 64 embedding_length = 200 #model = LSTMClassifier(batch_size, output_size, hidden_size, vocab_size, embedding_length, word_embeddings) model = AttentionModel(batch_size, output_size, hidden_size, vocab_size, embedding_length, word_embeddings) #model = SelfAttention(batch_size, output_size, hidden_size, vocab_size, embedding_length, word_embeddings) #loss_fn = F.cross_entropy print(LABEL.vocab.stoi) print(LABEL.vocab.freqs) print(LABEL) label_weights = torch.FloatTensor(np.asarray([1.0, 2.0])) label_weights_tensor = Variable(label_weights, volatile=True).cuda() loss_fn = torch.nn.CrossEntropyLoss(weight=label_weights_tensor) #loss_fn = torch.nn.CrossEntropyLoss() for epoch in range(10): train_loss, train_acc = train_model(model, loss_fn, train_iter, epoch) val_loss, val_acc = eval_model(model, loss_fn, valid_iter) print( 'Epoch: %d, Train Loss: %.3f, Train Acc: %.2f, Val Loss: %.3f, Val Acc: %.2f' % (epoch + 1, train_loss, train_acc, val_loss, val_acc)) evaluate(model, TEXT, LABEL, args.train_data_tsv_file, epoch) evaluate(model, TEXT, LABEL, args.val_data_tsv_file, epoch) #torch.save(model.state_dict(), args.save_model_file+'.epoch'+str(epoch+1)) test_loss, test_acc = eval_model(model, loss_fn, valid_iter) print('Test Loss: %.3f, Test Acc: %.2f' % (test_loss, test_acc))
def fileopen(self): global v1, clean_review, train, test filename = fdialog.askopenfilename(filetypes=(("TSV Files", "*.tsv"), ("All Files", "*.*"))) name = filename path = StringVar() path.set(name) self.Path_Field.configure(textvariable=path) try: file = io.StringIO() with redirect_stdout(file): obj_load = load_dataset(name) df = obj_load.load() obj_review = praproses(df['review']) df['review'] = obj_review.cleaning() output = file.getvalue() obj_split = fold(df) train, test = obj_split.train_test() self.Scrolledtext1.configure(state='normal') self.Scrolledtext1.insert(END, output) self.Scrolledtext1.configure(state='disabled') v1 = IntVar() self.BOW.configure(state='normal', variable=v1, value=1, command=self.vsm) self.D2V.configure(state='normal', variable=v1, value=2, command=self.vsm) except: self.Scrolledtext1.configure(state='normal') self.Scrolledtext1.insert( END, "Tidak bisa membaca file yang dimuat.\n") self.Scrolledtext1.configure(state='disabled')
import os import time import theano from theano import tensor as T import numpy as np from load_data import load_dataset import lasagne import matplotlib.pyplot as plt # print("Loading data...") # X_train, y_train, X_val, y_val, X_test, y_test = load_dataset() # print(X_train.shape) # print(y_train.shape) # print(X_val.shape) # print(y_val.shape) # print(X_test.shape) # print(y_test.shape) print("Loading data...") X_train, y_train, X_val, y_val, X_test, y_test = load_dataset() plt.figure(figsize=(12,3)) for i in range(10): plt.subplot(1, 10, i+1) plt.imshow(X_train[i].reshape((28, 28)), cmap='gray', interpolation='nearest') plt.axis('off') plt.savefig('digits.png',bbox_inches='tight')
import warnings from os.path import join, dirname from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer from sklearn.naive_bayes import MultinomialNB from load_data import load_dataset from model import ClassifierModel warnings.filterwarnings("ignore") if __name__ == '__main__': train_path = join(dirname(dirname(__file__)), "data", "train.xlsx") test_path = join(dirname(dirname(__file__)), "data", "test.xlsx") X_train, y_train = load_dataset(train_path) X_test, y_test = load_dataset(test_path) models = [ ClassifierModel("Tfidf Bigram", TfidfVectorizer(ngram_range=(1, 2))), ClassifierModel("Tfidf Trigram", TfidfVectorizer(ngram_range=(1, 3))), ClassifierModel("Count Bigram", CountVectorizer(ngram_range=(1, 2))), ClassifierModel("Count Trigram", CountVectorizer(ngram_range=(1, 3))) ] for n in [2000, 5000, 10000, 15000, 20000]: model = ClassifierModel("Count Max Feature {}".format(n), CountVectorizer(max_features=n)) models.append(model) for n in [2000, 5000, 10000, 15000, 20000]: model = ClassifierModel("Count Max Feature {}".format(n),
def main(): parser = argparse.ArgumentParser() parser.add_argument("model", help="model name", choices=['cifar', 'lenet']) parser.add_argument('-n', '--num-epochs', type=int, default=20) parser.add_argument('-f', '--model-file', help="model file") parser.add_argument('--no-separate', help='split the data', action='store_true') parser.add_argument('--second-part', help='take second part of data instead of the first', action='store_true') parser.add_argument('-b', '--batch-size', type=int, default=64) parser.add_argument('-l', '--learning-rate', type=float, default=0.01) parser.add_argument('-t', '--test-only', action='store_true') parser.add_argument('-T', '--train-from-layer', help='only train on this layer and those layers after it, \ don\'t update weights of layers before this layer') parser.add_argument('-p', '--prefix', help='prefix to add at the beginning of model save file') args = parser.parse_args() model = args.model batch_size = args.batch_size separate = not args.no_separate model_file = args.model_file num_epochs = args.num_epochs learning_rate = args.learning_rate save_file_name = model + '_model' test_only = args.test_only load_first_part = not args.second_part train_from_layer = args.train_from_layer prefix = args.prefix if test_only and not model_file: print('you need to specify a model file to test') exit() if separate: if load_first_part: save_file_name = 'firsthalf_' + save_file_name else: save_file_name = 'secondhalf_' + save_file_name nOutput = 5 else: nOutput = 10 if train_from_layer: save_file_name = 'from_' + train_from_layer + save_file_name if prefix: save_file_name = prefix + save_file_name else: save_file_name = str(random.randint(10000, 99999)) + '_' + save_file_name logfile = save_file_name + '_log.txt' log_print = functools.partial(log_and_print, logfile=logfile) log_print('--Parameter--') log_print(' model={}'.format(model)) log_print(' batch_size={}'.format(batch_size)) log_print(' num_epochs={}'.format(num_epochs)) log_print(' learning_rate={}'.format(learning_rate)) log_print(' separate data :{}'.format(separate)) if separate: s = ' take first or second part of data :' + ('first' if load_first_part else 'second') log_print(s) log_print(' model_file :{}'.format(model_file)) log_print(' nOutput = {}'.format(nOutput)) log_print(' model will be saved to : {}'.format(save_file_name + '*.npz')) log_print(' log will be saved to : {}'.format(logfile)) log_print(' test only :{}'.format(test_only)) log_print(' only train from this layer : {}'.format(train_from_layer)) log_print(' prefix to save file : {}'.format(prefix)) log_print('') log_print("Loading data...") X_train, y_train, X_val, y_val, X_test, y_test = load_data.load_dataset(model, separate, load_first_part) log_print('{} train images'.format(len(X_train))) log_print('{} val images'.format(len(X_val))) log_print('{} test images'.format(len(X_test))) # Prepare Theano variables for inputs and targets input_var = T.tensor4('inputs') target_var = T.ivector('targets') # Create neural network model (depending on first command line parameter) log_print("Building model and compiling functions...") net, net_output = model_io.load_model(model, model_file, nOutput, input_var) prediction = lasagne.layers.get_output(net_output) loss = lasagne.objectives.categorical_crossentropy(prediction, target_var) loss = loss.mean() if train_from_layer: layers_to_train = lasagne.layers.get_all_layers(net_output, treat_as_input=[net[train_from_layer]]) params = get_all_params_from_layers(layers_to_train, trainable=True) else: params = lasagne.layers.get_all_params(net_output, trainable=True) updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=learning_rate, momentum=0.9) test_prediction = lasagne.layers.get_output(net_output, deterministic=True) test_loss = lasagne.objectives.categorical_crossentropy(test_prediction, target_var) test_loss = test_loss.mean() test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX) train_fn = theano.function([input_var, target_var], loss, updates=updates) val_fn = theano.function([input_var, target_var], [test_loss, test_acc]) if not test_only: log_print("Starting training...") for epoch in range(num_epochs): train_err = 0 train_batches = 0 start_time = time.time() print("Training stage:") for batch in load_data.iterate_minibatches(X_train, y_train, batch_size, shuffle=True): time_batch = time.time() inputs, targets = batch this_train_err = train_fn(inputs, targets) train_err += this_train_err train_batches += 1 print('train batch', train_batches, 'err+=', this_train_err, '{:.2f}'.format(time.time() - time_batch), 'seconds') val_err = 0 val_acc = 0 val_batches = 0 print("Validation stage ..") for batch in load_data.iterate_minibatches(X_val, y_val, batch_size, shuffle=False): inputs, targets = batch err, acc = val_fn(inputs, targets) val_err += err val_acc += acc val_batches += 1 # Then we print the results for this epoch: log1 = "Epoch {} of {} took {:.3f}m".format(epoch + 1, num_epochs, (time.time() - start_time) / 60.) log2 = " training loss:\t\t{:.6f}".format(train_err / train_batches) log3 = " validation loss:\t\t{:.6f}".format(val_err / val_batches) log4 = " validation accuracy:\t\t{:.2f} %".format(val_acc / val_batches * 100) log_print(log1) log_print(log2) log_print(log3) log_print(log4) # Optionally, you could now dump the network weights to a file like this: model_file = save_file_name + str(epoch) + '.npz' log_print('model saved to ' + model_file) model_io.save_model(model_file, net_output) log_print('testing network ...') # After training, we compute and print the test error: test_err = 0 test_acc = 0 test_batches = 0 for batch in load_data.iterate_minibatches(X_test, y_test, batch_size, shuffle=False): inputs, targets = batch err, acc = val_fn(inputs, targets) test_err += err test_acc += acc test_batches += 1 log_print("Final results:") log_print(" test loss:\t\t\t{:.6f}".format(test_err / test_batches)) log_print(" test accuracy:\t\t{:.2f} %".format( test_acc / test_batches * 100))
import load_data as ld import pandas as pd import numpy as np import matplotlib.pyplot as plt import ipdb import matplotlib matplotlib.style.use('ggplot') csvfile = "../Exercise.xlsx" df = ld.load_dataset(csvfile) list_df = ld.separate_diet(df) for diet in list_df: mean_time = diet.mean(axis=1).plot(label="Diet"+str(np.unique(diet["Diet"]))) plt.legend() plt.savefig("mean.pdf") plt.figure() for diet in list_df: diet_string = str(np.unique(diet["Diet"])[0]) diet.to_csv("chicken_weights_"+diet_string+".csv") weights_colnames = list(diet.columns.values) weights_colnames.remove('Diet') diet = diet.reset_index() plt.figure() ax = diet.plot(kind="scatter", x="Time", y=0)
def main(): """ Main function of the project. """ args = init_parser().parse_args() random.seed(490) dict_info = {} # Problem's definition dict_info['depot'] = model.Point(args.depot[0], args.depot[1]) width, height = 300, 300 ind_size = args.vehicle * args.node dict_info['zoom'] = args.zoom # Genetic parameter crossover_probability = args.crossover mutation_probability = args.mutation ngen = args.generation _mu = args.size _lambda = args.size # Generate a the problem's data set # i.e: Generate N "route" of appointement #list_appointment = model.generate_route(num_route, # num_node_per_route, # width, # height, # dict_info['depot']) dict_info['data'] = load_data.load_dataset(args.path) # Set the routes color dict_info['color'] = visualisation.color_group(args.vehicle) toolbox = init_toolbox( ind_size, args.vehicle, dict_info['data'], dict_info['depot']) # Create the global population # And an elite one pop = toolbox.population(n=args.size) hof = tools.HallOfFame(args.elite) # Create a statistic module to display stats at each generation stats = tools.Statistics(lambda ind: ind.fitness.values) stats.register("avg", numpy.mean, axis=0) stats.register("std", numpy.std, axis=0) stats.register("min", numpy.min, axis=0) stats.register("max", numpy.max, axis=0) root = visualisation.Tk() root.geometry(str(width) + "x" + str(height)) # The genetic algorithm in itself algorithms.eaMuPlusLambda(pop, toolbox, _mu, _lambda, crossover_probability, mutation_probability, ngen, stats=stats, halloffame=hof) dict_info['tour'] = visualisation.individual_as_appointment( hof[0], dict_info['data']['appointment'] ) # Create display of the problem and of the best solution visualisation.Example(root, dict_info) # Start the GUI main loop root.mainloop()
def main(): parser = argparse.ArgumentParser() parser.add_argument("model", help="model name", choices=['cifar', 'lenet']) parser.add_argument("model_file", help="model file") parser.add_argument('layer', help='layer name to get output') parser.add_argument('--no-separate', help='split the data', action='store_true') parser.add_argument('--first-part', help='take first part of data instead of the second', action='store_true') parser.add_argument('-b', '--batch-size', type=int, default=64) parser.add_argument('-n', '--data-num', type=int) args = parser.parse_args() model = args.model batch_size = args.batch_size separate = not args.no_separate model_file = args.model_file layer_name = args.layer load_first_part = args.first_part data_num = args.data_num filename = model + '_' + layer_name + '_output.save' print('--Parameters--') print(' model : ', model) print(' layer name : ', layer_name) print(' batch_size : ', batch_size) print(' model_file : ', model_file) print(' middle output will be saved to : ', filename) print(' separate data :', separate) if separate: print(' take first or second part of data :', 'first' if load_first_part else 'second') print('batch_size=', batch_size) if separate: nOutput = 5 else: nOutput = 10 # Load the dataset print("Loading data...") X_train, y_train, X_val, y_val, X_test, y_test = load_data.load_dataset(model, separate, load_first_part) if data_num: X_train = X_train[:data_num] y_train = y_train[:data_num] X_val = X_val[:data_num] y_val = y_val[:data_num] X_test = X_test[:data_num] y_test = y_test[:data_num] print(len(X_train), 'train images') print(len(X_val), 'val images') print(len(X_test), 'test images') # Prepare Theano variables for inputs and targets input_var = T.tensor4('inputs') # Create neural network model (depending on first command line parameter) print("Building model and compiling functions...") net, net_output = model_io.load_model(model, model_file, nOutput, input_var) # middle_output = theano.function([input_var], net[layer_name].output) print("Getting middle output...") output = lasagne.layers.get_output(net[layer_name]) get_output = theano.function([input_var], output.flatten(2)) output_shape = np.array(lasagne.layers.get_output_shape(net[layer_name])) print('layer ' + layer_name + ' shape :', output_shape) all_train_output = [] all_train_y = [] all_test_output = [] all_test_y = [] print('getting from train') for batch in load_data.iterate_minibatches(X_train, y_train, batch_size, shuffle=False): print('.', end='', flush=True) inputs, targets = batch batch_output = get_output(inputs) # a numpy ndarray all_train_output.extend(batch_output.tolist()) all_train_y.extend(targets.tolist()) print() print('getting from test') for batch in load_data.iterate_minibatches(X_test, y_test, batch_size, shuffle=False): print('.', end='', flush=True) inputs, targets = batch batch_output = get_output(inputs) # a numpy ndarray all_test_output.extend(batch_output.tolist()) all_test_y.extend(targets.tolist()) print() print("train output shape : ", np.array(all_train_output).shape) print("train y shape : ", np.array(all_train_y).shape) print("test output shape : ", np.array(all_test_output).shape) print("test y shape : ", np.array(all_test_y).shape) with open(filename, 'wb') as f: pickle.dump([all_train_output, all_train_y, all_test_output, all_test_y], f, protocol=pickle.HIGHEST_PROTOCOL) print('... saved to ', filename)
lasagne.layers.dropout(network, p=.5), num_units=256, nonlinearity=lasagne.nonlinearities.rectify) # And, finally, the 10-unit output layer with 50% dropout on its inputs: network = lasagne.layers.DenseLayer( lasagne.layers.dropout(network, p=.5), num_units=6, nonlinearity=lasagne.nonlinearities.softmax) return network if __name__ == '__main__': print("Loading Data") X_train, y_train, X_valid, y_valid, X_test, y_test = load_data.load_dataset("/home/prosurpa/Image/image/") input_var = T.tensor4('inputs') target_var = T.ivector('targets') print("Bulding Model") batch_size = 1 network = build_simple_cnn(batch_size, input_var) prediction = lasagne.layers.get_output(network) loss = lasagne.objectives.categorical_crossentropy(prediction, target_var) loss = loss.mean() params = lasagne.layers.get_all_params(network, trainable=True)
else: tile_shape.append(1) #print tile_shape if untiled_param is not None: # use the untiled param to recover the updates sum_of_updates = T.sum(param - untiled_param, tying_dims, keepdims=1) updated_untiled_param = sum_of_updates + untiled_param else: # just take the mean value of the parameters (i.e. averaging instead of summing the updates) updated_untiled_param = T.mean(param, tying_dims, keepdims=1) updated_param = T.tile(updated_untiled_param, tile_shape, ndim=len(dims_shared)) return updated_param, updated_untiled_param ############# # LOAD DATA # ############# train_x, train_y, test_x, test_y, input_shape = load_dataset(dataset) input_shape = (1, input_shape[2], input_shape[0], input_shape[1], batchsize) # reshaped for locally_connected layers if use_10percent_of_dataset: train_x *= 256 test_x *= 256 nex = 5000 ntest = nex / 10 else: nex = 50000 ntest = 10000 print "training on " + str(nex) + " examples" train_x = train_x[:nex] test_x = test_x[:ntest] train_y = train_y[:nex]
import load_data as ld import pandas as pd import numpy as np import matplotlib.pyplot as plt import ipdb import matplotlib matplotlib.style.use('ggplot') csvfile = "../Exercise.xlsx" df_raw = ld.load_dataset(csvfile) group = df_raw.groupby("Diet") for i in xrange(1,5): df = group.get_group(i) x = df["Time"] y = df["Weight"] results = np.polyfit(x,y,1) print "For diet:", i, "slope=", results[0] xx = np.arange(0, 25) yy = results[1] + results[0]*xx plt.figure() df.plot(kind="Scatter", x="Time", y="Weight") plt.plot(xx,yy) plt.show()
from os.path import join, dirname import time import joblib import pycrfsuite from sklearn_crfsuite import metrics from load_data import load_dataset transformer = joblib.load(join(dirname(__file__), "model", "transformer.bin")) path = join(dirname(__file__), "model", "model.bin") estimator = pycrfsuite.Tagger() estimator.open(path) test_set = load_dataset(join(dirname(dirname(dirname(__file__))), "data", "vlsp2016", "corpus", "test.txt")) X_test, y_test = transformer.transform(test_set) start = time.time() y_pred = [estimator.tag(x) for x in X_test] end = time.time() test_time = end - start f1_test_score = metrics.flat_f1_score(y_test, y_pred, average='weighted') print("F1 score: ", f1_test_score) print("Test time: ", test_time) with open("report.txt", "w") as f: f.write("F1 score: " + str(f1_test_score) + "\n" + "Test time: " + str(test_time))
def main(): parser = argparse.ArgumentParser() parser.add_argument("model", help="model name", choices=['cifar', 'lenet']) parser.add_argument("model_file", help="model file") parser.add_argument('layer', help='layer name to get image output') parser.add_argument('imageID', help='ID of image for input', type=int) parser.add_argument('-d', '--dataset', choices=['train', 'val', 'test'], default='test') parser.add_argument('--no-separate', help='split the data', action='store_true') parser.add_argument('--first-part', help='take first part of data instead of the second', action='store_true') parser.add_argument('-i', '--input', help='only get input image', action='store_true') parser.add_argument('-w', '--draw-weights', help='only draw weights, give the width of kernel', action='store_true') args = parser.parse_args() model = args.model batch_size = 1 separate = not args.no_separate model_file = args.model_file layer_name = args.layer chosen_set = args.dataset load_first_part = args.first_part imageID = args.imageID only_input = args.input only_weights = args.draw_weights if not only_weights: filename = str(imageID) + '_' + model + '_' + layer_name + '_output.png' else: filename = 'weight_' + model + '_' + layer_name + '_output.png' print('--Parameters--') print(' model : ', model) print(' layer name : ', layer_name) print(' batch_size : ', batch_size) print(' model_file : ', model_file) print(' middle output images will be saved to : ', filename) print(' separate data :', separate) if separate: print(' take first or second part of data :', 'first' if load_first_part else 'second') print('batch_size=', batch_size) if separate: nOutput = 5 else: nOutput = 10 # Load the dataset print("Loading data...") if not only_weights: if only_input: X_train, y_train, X_val, y_val, X_test, y_test = load_data.load_dataset(model, separate, load_first_part, substract_mean=False) else: X_train, y_train, X_val, y_val, X_test, y_test = load_data.load_dataset(model, separate, load_first_part) print(len(X_train), 'train images') print(len(X_val), 'val images') print(len(X_test), 'test images') print('getting from' + chosen_set) if chosen_set == 'train': X_set = X_train y_set = y_train elif chosen_set == 'val': X_set = X_val y_set = y_val else: X_set = X_test y_set = y_test if only_input: image_data = X_set[imageID] if model == 'cifar': image_data = image_data.reshape((3, 32, 32)) image_data = np.rollaxis(image_data, 0, 3) # 3 32 32 to 32 32 3 else: image_data = image_data.reshape((28, 28)) image_data *= 255 image_data = image_data.astype('uint8') image = Image.fromarray(image_data) image.save(filename) print('image saved to :', filename) exit() # Prepare Theano variables for inputs and targets input_var = T.tensor4('inputs') # Create neural network model (depending on first command line parameter) print("Building model and compiling functions...") net, net_output = model_io.load_model(model, model_file, nOutput, input_var) if not only_weights: print("Getting middle output...") output = lasagne.layers.get_output(net[layer_name]) get_output_image = theano.function([input_var], output.flatten(3)) output_shape = np.array(lasagne.layers.get_output_shape(net[layer_name])) foo, nKernel, h, w = output_shape print('layer ' + layer_name + ' shape :', output_shape) batch_output = get_output_image(np.array([X_set[imageID]])) images_output = batch_output[0] prediction = lasagne.layers.get_output(net_output) get_pred = theano.function([input_var], prediction) pred = get_pred(np.array([X_set[imageID]])) else: if model == 'cifar': weights = net[layer_name].W.get_value() print('weights shape :', weights.shape) nKernel, foo, h, w = weights.shape assert foo == 3 flatten_w = net[layer_name].W.flatten(3) images_output = flatten_w.eval() images_output = np.rollaxis(images_output, 1, 0) # nKernel 3 w*h to 3 nKernel w*h print('flatten weights shape :', images_output.shape) else: weights = net[layer_name].W.get_value() print('weights shape :', weights.shape) nKernel, foo, h, w = weights.shape assert foo == 1 flatten_w = net[layer_name].W.flatten(2) images_output = flatten_w.eval() print('flatten weights shape :', images_output.shape) width = 1 while width * width < nKernel: width += 1 if width * width > nKernel: if images_output.ndim == 2: images_output = np.concatenate((images_output, np.zeros((width * width - nKernel, w * h))), axis=0) elif images_output.ndim == 3: images_output = np.concatenate((images_output, np.zeros((3, width * width - nKernel, w * h))), axis=1) else: assert False image = Image.fromarray(tile_raster_images( X=images_output, # chose batch 0 img_shape=(h, w), tile_shape=(width, width), tile_spacing=(1, 1))) image.save(filename) print('image saved to :', filename)