def _reading_data(): print(config.USER) # step2 the way to load_data # load data contains : # the way to load data # the way to preprocess with data # doing some special data cleaning process trainFilepath = os.path.join(os.getcwd(), "data", config.FILENAME) trainDataLoader = DataLoader(trainFilepath) train_data = trainDataLoader.load_data(useSpark=False, interactive=False) train_data.save_data(os.getcwd())
}, os.path.join(args.exp_dir , 'unfinished_model.pt')) epoch += 1 cost_time = time.time() - since print ('Training complete in {:.0f}m {:.0f}s'.format(cost_time//60,cost_time%60)) print ('Best Train Acc is {:.4f}'.format(best_train_acc)) print ('Best Val Acc is {:.4f}'.format(best_acc)) model.load_state_dict(best_model) return model,cost_time,best_acc,best_train_acc if __name__ == '__main__': print ('DataSets: '+args.dataset) print ('ResNet Depth: '+str(args.depth)) loader = DataLoader(args.dataset,batch_size=args.batch_size) dataloaders,dataset_sizes = loader.load_data() num_classes = 10 if args.dataset == 'cifar-10': num_classes = 10 if args.dataset == 'cifar-100': num_classes = 100 model = resnet_cifar(depth=args.depth, num_classes=num_classes) optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, nesterov=True, weight_decay=1e-4) # define loss and optimizer criterion = nn.CrossEntropyLoss() scheduler = MultiStepLR(optimizer, milestones=[args.epoch*0.4, args.epoch*0.6, args.epoch*0.8], gamma=0.1) use_gpu = torch.cuda.is_available()
epoch += 1 cost_time = time.time() - since print('Training complete in {:.0f}h{:.0f}m{:.0f}s'.format( (cost_time // 60) // 60, (cost_time // 60) % 60, cost_time % 60)) return model, cost_time, best_acc, best_train_acc if __name__ == '__main__': loader = DataLoader(args.dataset, batch_size=args.batch_size, seed=args.seed) dataloaders, dataset_sizes = loader.load_data(args.img_size) num_classes = 10 if args.dataset == 'cifar-10': num_classes = 10 if args.dataset == 'cifar-100': num_classes = 100 if args.dataset == 'VOCpart': num_classes = len(dataloaders['train'].dataset.classes) assert args.img_size == 128, 'only supports --img_size 128' model = resnet_std(depth=args.depth, num_classes=num_classes, ifmask=args.ifmask, pretrained=True)
def Run_SRNN_NormalCase(args, no_dataset): data_path, graph_path = Data_path(no_dataset) log_path = Log_path(no_dataset) # Construct the DataLoader object that loads data dataloader = DataLoader(args) dataloader.load_data(data_path) # Construct the ST-graph object that reads graph stgraph = ST_GRAPH(args) stgraph.readGraph(dataloader.num_sensor, graph_path) # Initialize net net = SRNN(args) net.setStgraph(stgraph) print('- Number of trainable parameters:', sum(p.numel() for p in net.parameters() if p.requires_grad)) # optimizer = torch.optim.Adam(net.parameters(), lr=args.learning_rate) # optimizer = torch.optim.RMSprop(net.parameters(), lr=args.learning_rate, momentum=0.0001, centered=True) optimizer = torch.optim.Adagrad(net.parameters()) best_eval_loss = 10000 best_epoch = 0 print('') print('---- Train and Evaluation ----') eval_loss_res = np.zeros((args.num_epochs + 1, 2)) for e in range(args.num_epochs): epoch = e + 1 #### Training #### print('-- Training, epoch {}/{}'.format(epoch, args.num_epochs)) loss_epoch = 0 # For each batch for b in range(dataloader.num_batches_train): batch = b + 1 start = time.time() # Get batch data x = dataloader.next_batch_train() # Loss for this batch loss_batch = 0 # For each sequence in the batch for sequence in range(dataloader.batch_size): # put node and edge features stgraph.putSequenceData(x[sequence]) # get data to feed data_nodes, data_temporalEdges, data_spatialEdges = stgraph.getSequenceData( ) # put a sequence to net loss_output, data_nodes, outputs = forward( net, optimizer, args, stgraph, data_nodes, data_temporalEdges, data_spatialEdges) loss_output.backward() loss_batch += loss_RMSE(data_nodes[-1], outputs[-1], dataloader.scaler) # Clip gradients torch.nn.utils.clip_grad_norm_(net.parameters(), args.grad_clip) # Update parameters optimizer.step() end = time.time() loss_batch = loss_batch / dataloader.batch_size loss_epoch += loss_batch print('Train: {}/{}, train_loss = {:.3f}, time/batch = {:.3f}'. format(e * dataloader.num_batches_train + batch, args.num_epochs * dataloader.num_batches_train, loss_batch, end - start)) # Compute loss for the entire epoch loss_epoch /= dataloader.num_batches_train print('(epoch {}), train_loss = {:.3f}'.format(epoch, loss_epoch)) # Save the model after each epoch save_path = Save_path(no_dataset, epoch) print('Saving model to ' + save_path) torch.save( { 'epoch': epoch, 'state_dict': net.state_dict(), 'optimizer_state_dict': optimizer.state_dict() }, save_path) #### Evaluation #### print('-- Evaluation, epoch {}/{}'.format(epoch, args.num_epochs)) loss_epoch = 0 for b in range(dataloader.num_batches_eval): batch = b + 1 start = time.time() # Get batch data x = dataloader.next_batch_eval() # Loss for this batch loss_batch = 0 for sequence in range(dataloader.batch_size): # put node and edge features stgraph.putSequenceData(x[sequence]) # get data to feed data_nodes, data_temporalEdges, data_spatialEdges = stgraph.getSequenceData( ) # put a sequence to net _, data_nodes, outputs = forward(net, optimizer, args, stgraph, data_nodes, data_temporalEdges, data_spatialEdges) loss_batch += loss_RMSE(data_nodes[-1], outputs[-1], dataloader.scaler) end = time.time() loss_batch = loss_batch / dataloader.batch_size loss_epoch += loss_batch print( 'Eval: {}/{}, eval_loss = {:.3f}, time/batch = {:.3f}'.format( e * dataloader.num_batches_eval + batch, args.num_epochs * dataloader.num_batches_eval, loss_batch, end - start)) loss_epoch /= dataloader.num_batches_eval eval_loss_res[e] = (epoch, loss_epoch) # Update best validation loss until now if loss_epoch < best_eval_loss: best_eval_loss = loss_epoch best_epoch = epoch print('(epoch {}), eval_loss = {:.3f}'.format(epoch, loss_epoch)) # Record the best epoch and best validation loss overall print('Best epoch: {}, Best evaluation loss {:.3f}'.format( best_epoch, best_eval_loss)) eval_loss_res[-1] = (best_epoch, best_eval_loss) np.savetxt(log_path, eval_loss_res, fmt='%d, %.3f') print('- Eval result has been saved in ', log_path) print('')
if __name__ == "__main__": # Ensure exactly 3 arguments if len(sys.argv) != 3: print( 'USAGE: python inspection.py TRAIN_INPUT_FILE INSPECTION_OUT_FILE') sys.exit(1) TRAIN_INPUT_FILE = sys.argv[1] INSPECTION_OUT_FILE = sys.argv[2] # Check the input file type if not TRAIN_INPUT_FILE.endswith('.tsv'): print('Error: TRAIN_INPUT_FILE must be .tsv files') sys.exit(1) if not INSPECTION_OUT_FILE.endswith('.txt'): print('Error: INSPECTION_OUT_FILE must be .txt file') sys.exit(1) # Load the input file data_loader = DataLoader() data_loader.load_data(TRAIN_INPUT_FILE) inspection = Inspection(data_loader) entropy, error_rate, _ = inspection.evaluate() # Output the result with open(INSPECTION_OUT_FILE, mode='w+') as f: f.write('entropy: ' + str(entropy) + '\n') f.write('error: ' + str(error_rate))
# for num in random_list[:int(len(random_list) / 2)]: # bloom_two.add(chr(num)) # # estimate_num_of_elem_A = bloom_one.estimate_num_of_elem() # estimate_num_of_elem_B = bloom_two.estimate_num_of_elem() # print("estimate_num_of_elem_A: " + str(estimate_num_of_elem_A)) # print("estimate_num_of_elem_B: " + str(estimate_num_of_elem_B)) # # estimate_size_of_union = bloom_one.estimate_size_of_union(bloom_two) # print("estimate_size_of_union: " + str(estimate_size_of_union)) # # estimate_size_of_intersection = bloom_one.estimate_size_of_intersection(bloom_two) # print("estimate_size_of_intersection: " + str(estimate_size_of_intersection)) loader = DataLoader('columns.txt') cols = loader.load_data() block_cnt = 20 block_len = 30 n = block_cnt * block_len # code space. set it to the max size of a col for now p = 0.01 # false positive probability # build bloom filter for all cols bloom_filter_list = [] for col in cols: bloom_filter = BloomFilter(n, p) for num in col: bloom_filter.add(chr(num)) bloom_filter_list.append(bloom_filter) # write each bloom filter to file