def file_upload(): if request.method == 'POST': global input_data target = os.path.join(UPLOAD_FOLDER, 'uploads') check_dir(target) print("welcome to upload") file = request.files['file'] destination = "/".join([target, file.filename]) print(f"filename : {file.filename}") file.save(destination) preprocess(filename=file.filename) input_data = load_pickle_data('./data/output/sample_output.pkl') return jsonify({'result': 'success'})
def prepare_data(path, word2idx, num_threads=8, **opts): with tf.device("/cpu:0"): enqueue_data, dequeue_batch = get_input_queues( path, word2idx, batch_size=opts["batch_size"], num_threads=num_threads) # TODO: put this logic somewhere else input_ph = tf.placeholder_with_default(dequeue_batch, (None, None)) source, target, sequence_length = preprocess(input_ph) return enqueue_data, input_ph, source, target, sequence_length
def main(args): torch.manual_seed(args.seed) model = Model() model.cuda() train_loader, verification_loader = preprocess(args.data_dir) train(model, train_loader, verification_loader, args.epochs, args.train_batch_sz, args.verification_batch_sz, args.save_path)
def main(args): # # Device configuration device = torch.device( 'cuda:{}'.format(args.gpu) if torch.cuda.is_available() else 'cpu') num_epochs = 80 num_classes = 8 learning_rate = 0.08 num_views = 3 num_layers = 4 data_path = args.dir file_list = [ './data/train_web_content.npy', './data/train_web_links.npy', './data/train_web_title.npy', './data/test_web_content.npy', './data/test_web_links.npy', './data/test_web_title.npy', './data/train_label.npy', './data/test_label.npy' ] aaa = list(map(os.path.exists, file_list)) if sum(aaa) != len(aaa): print( 'Raw data has not been pre-processed! Start pre-processing the raw data.' ) data_loader.preprocess(data_path) else: print('Loading the existing data set...') # train_dataset = data_loader.Load_datasets('train', num_classes) train_dataset = data_loader.Load_datasets('train', 8) train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4) input_dims = np.array(train_dataset.data[0]).shape model = CNN_Text(input_dims, [64, 32, 32, 32], [1, 2, 3, 4], num_classes, 0.5, num_layers, num_views).to(device) model = model.double() model.device = device model.learning_rate = learning_rate model.epoch = 0 if args.model != None: model.load_state_dict(torch.load(args.mpodel)) print('Successfully load pre-trained model!') # train the model until the model is fully trained train_model(model, train_loader, num_epochs) print('Finish training process!') evaluation(model)
def recognize(self, audio_path): preprocessed_audio = preprocess(audio_path, self.sample_rate, self.window_size, self.window_stride) if self.cpu: from PuzzleLib.CPU.CPUArray import CPUArray inputs = CPUArray.toDevice(np.array([preprocessed_audio]).astype(np.float32)) else: from PuzzleLib.Backend import gpuarray inputs = gpuarray.to_gpu(np.array([preprocessed_audio]).astype(np.float16)) output = self.w2l(inputs).get() output = np.vstack(output).astype(np.float32) result = self.decoder.decode(output) if not self.cpu: from PuzzleLib.Backend.gpuarray import memoryPool memoryPool.freeHeld() del inputs, output return result
import pandas as pd import pprint from data_loader import load_pickle_data, preprocess from utils import listing_daterange, find_string_idx, hlight_term, analyze_docs from lda.lda_trainer import LDATrainer from lda.lda_corpus import * from lda.lda_topic_modeling import TopicModeler from lda.pipeline import Pipeline if __name__ == '__main__': preprocess('sample_100_news.tsv') input_raw_data = load_pickle_data('./data/output/sample_output.pkl') #prepare dictionary, corpus tokens_list = input_raw_data['tokens'] bigram = create_bigram_model(tokens_list, min_count=5, threshold=50) tokens_list = convert_to_bigram_tokens(tokens_list, bigram) #modeling trainer = LDATrainer(tokens_list=tokens_list, tf_idf=False) # trainer.train(trainer.corpus, trainer.dictionary, num_topics=6, passes=5, workers=4, iterations=10, chunksize=50, # save=True) # trainer.fit_optimal_topic_model(trainer.corpus, trainer.dictionary, tokens_list, # passes=30, workers=4, iterations=20, chunksize=400, # limit=10, start=2, step=2) topic_model = TopicModeler() print(topic_model.sorted_topic_keyword(topic_model.topic_info, topic=1, _lambda=0.5)) # topic_model.show_topic_modeling()
def main(): opt = TrainOptions().parse() print('point0, option parser finished') ''' 1. load_data, only select feature images, default: load the gradient of DEM return: min, max among all the input images ''' frame_data, mask_data, minn, maxx = load_feature_data(opt.frame_path, opt.mask_path, gradient=False,dim=opt.input_shape) print('point1, finished load data') print('point2, shape frame mask', frame_data.shape, mask_data.shape) '''2. split train_val_test: input_train/val/test label_train/val/test ''' input_train, input_test, label_train, label_test = train_test_split( frame_data, mask_data, test_size=0.15, shuffle=False) input_train, input_val, label_train, label_val = train_test_split( frame_data, mask_data, test_size=0.1, shuffle=False) print('point3, shape frame mask', input_train.shape, label_train.shape) n_train, n_test, n_val = len(input_train), len(input_test), len(input_val) print('***** #train: #test: #val = %d : %d :%d ******'%(n_train, n_test, n_val)) Data_dict = { 'train':[input_train.astype('float32'), label_train.astype('float32')], 'val':[input_val.astype('float32'), label_val.astype('float32')], 'test':[input_test.astype('float32'), label_test.astype('float32')] } '''3. preprocess_data ----- normalize all the data ''' preprocess(Data_dict, minn, maxx, opt.input_shape) mkdir(opt.result_path) if opt.isTrain: # the actual model mkdir(opt.model_path) define_model(Data_dict, opt) else: # test/ prediction print('===========test==========') test_model(Data_dict, opt) # visualize result img = Data_dict['test'][0][:,:,:,0] real = np.load(opt.result_path + '/gt_labels.npy') pred = np.load(opt.result_path + '/pred_labels.npy') predicted_data = np.zeros(pred.shape) for i in range(pred.shape[0]): for j in range(pred.shape[1]): for k in range(pred.shape[2]): if (pred[i,j,k]>=0.5): predicted_data[i,j,k] =1 else: predicted_data[i,j,k] =0 for i in range(100): visualize(opt.result_path,img,real,pred,predicted_data,i)