def test_fit_func_ranking(self): # training dataset train_dtset = Data() # testing dataset test_dtset = Data() train_dtset.attr_file = self.filename1 test_dtset.attr_file = self.filename1 # attributes data train_dtset.read_attr_data() test_dtset.read_attr_data() # poluting training # train to test = 80/20 # random list from 0 to (length of ex - 1) train_index_list = random.sample(xrange(len(self.dataset.examples)), int(len(self.dataset.examples) * 0.8)) train_dtset.examples = [ self.dataset.examples[index] for index in train_index_list if (self.dataset.examples[index] not in train_dtset.examples) ] test_dtset.examples = [ ex for ex in self.dataset.examples if (ex not in train_dtset.examples) ] root = compute_tree(train_dtset, None, None) print test_examples(root, test_dtset)
def main(args: argparse.Namespace): train_arr, test_arr = Data.read_pickle(args.pickle, 'train'), Data.read_pickle( args.pickle, 'test') train = DataLoader(Data(train_arr, args.dev), batch_size=args.bsize, num_workers=args.workers, shuffle=True, collate_fn=Data.collate_fn) test = DataLoader(Data(test_arr, args.dev, train.dataset.mean, train.dataset.std), batch_size=args.bsize, num_workers=args.workers, shuffle=True, collate_fn=Data.collate_fn) model = PixelCnn(train.dataset.W, train.dataset.C, args.kernel_size, args.layers, args.filters, args.dist_size, getattr(models, args.conv_class)) if os.path.exists(args.save_path): model.load(args.save_path, args.dev) elif args.dev == 'cuda': model.cuda() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) nlls_train, nlls_test = training(train, test, model, optimizer, args.epochs, args.save_path) samples = model.generate_samples(args.n_samples, args.dev, train.dataset.mean, train.dataset.std) save_training_plot(nlls_train, nlls_test, 'NLL (nats/dim)', args.nll_img_path) show_samples(samples.cpu().numpy(), args.samples_img_path)
def __init__(self, args): self.args = args data = Data(args.train_path, args.val_path, args.glove_path) data.build_vocab() train_data, val_data = data.input2tensor() embedding_matrix = data.build_embedding_matrix(args.embed_type, args.embed_dim) train_dataset = MyDataset(train_data, data.max_len) val_dataset = MyDataset(val_data, data.max_len) self.train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True) self.val_dataloader = DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False) if args.model_type == 'CNN': self.model = CNNModel(args, data.vocab_size, embedding_matrix).to(args.device) else: self.model = LSTMNet(args, data.vocab_size, embedding_matrix).to(args.device) self.loss_func = nn.CrossEntropyLoss() self.optim = torch.optim.Adam(self.model.parameters(), lr=args.learning_rate) if torch.cuda.is_available(): print('cuda memory allocated:', torch.cuda.memory_allocated(device=args.device.index))
def main(args): train_dataset = Data(args.train_manifest) train_loader = data_utils.DataLoader(train_dataset, args.batch_size) eval_loader = None if args.eval_manifest: eval_dataset = Data(args.eval_manifest) eval_loader = data_utils.DataLoader(eval_dataset, args.batch_size) net = models_dict[args.model](args) if args.train: net.train(train_loader, eval_loader) elif args.eval: net.eval() else: net.predict()
def test0(): # Generate dataset np.random.seed(125) random.seed(239) n = 100 model_desc = semopy.model_generation.generate_desc(n_lat=0, n_endo=1, n_exo=2, n_inds=3, n_cycles=0) params, aux = semopy.model_generation.generate_parameters(model_desc) data_gen = semopy.model_generation.generate_data(aux, n) data_gen.index = [f's{i}' for i in range(n)] # generate random effects group1 = pd.DataFrame(data={'group1': np.random.binomial(1, 0.5, size=n)}) group1.index = [f's{i}' for i in range(n)] model_desc = """ x1 ~ g1 + g2 + group1 """ # # model_desc = """ # x1 ~ g1 + g2 # """ data_gen['x1'] = data_gen['x1'] + 10 * group1['group1'] data_gen = concat([data_gen, group1], axis=1) data = Data(d_phens=data_gen, show_warning=False) model = mtmlModel(model_desc=model_desc, data=data) model.opt_bayes() print(model.unnormalize_params()) # ----------------------------------- # semopy sem_old = Model(model_desc) sem_old.fit(data_gen) print('semopy') insp = sem_old.inspect() insp = insp.loc[insp['op'] == '~', ['lval', 'rval', 'Estimate']] print(insp) # semba semba_model = semba.Model(model_desc) semba_model.fit(data_gen, num_samples=1000) print('semba') insp = semba_model.inspect() insp = insp.loc[insp['op'] == '~', ['lval', 'rval', 'Estimate']] print(insp) print('params') print(params) return model_desc, data, model
def val_dataloader(self): d_params = Data.parameters d_params.update(self.args.dparams_override) test_dataset = Data(json_path=self.args.valid_file, **d_params, valid=True) return DataLoader(dataset=test_dataset, batch_size=self.args.batch_size, num_workers=self.args.data_workers, collate_fn=collate_fn_padd, pin_memory=True)
def train_dataloader(self): d_params = Data.parameters d_params.update(self.args.dparams_override) train_dataset = Data(json_path=self.args.train_file, **d_params) return DataLoader(dataset=train_dataset, batch_size=self.args.batch_size, num_workers=self.args.data_workers, pin_memory=True, collate_fn=collate_fn_padd)
def main(args): # =============================================== # Build Data Loader # =============================================== train_transformer = transformer.Compose([ transformer.RandomHorizontalFlip(), transformer.RandomScaleCrop(), transformer.ArrayToTensor(), transformer.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) ]) train_dataset = Data('train', transformer = train_transformer seed = args.seed, train = True, seq_length = 3) valid_transformer = transformer.Compose([transformer.ArrayToTensor(), normalize]) val_dataset = Data('val', transformer = valid_transformer seed = args.seed, train = True, seq_length = 3) train_loader = DataLoader(train_dataset, batch_size = args.batch_size, shuffle = True, num_workers = cfg.workers, pin_memory = True) val_loader = DataLoader(val_dataset, batch_size = args.batch_size, shuffle = True, num_workers = cfg.workers, pin_memory = True) # =============================================== # Build Model # =============================================== model = Net() model = model.cuda() if args.load: pass else: model.init_weights() model = DataParallel(model)
def setUp(self): self.filename1 = 'car.c45-names.txt' #attributes self.filename2 = 'car.data' # data examples self.dataset = Data() self.dataset.attr_file = self.filename1 self.dataset.data_file = self.filename2 self.dataset.read_attr_data() self.dataset.read_examples_data()
def data_import(feature_data, label_data): with open(feature_data, "r") as fi: feature_list = fi.readlines() with open(label_data, "r") as fi: label_list = fi.readlines() data_set = [] for i in range(feature_list.__len__()): data = Data(list(feature_list[i]), list(label_list[i])[0:1]) data_set.append(data) return Dataset(data_set)
def data_import(feature_data, label_data, bit): with open(feature_data, "r") as fi: feature_list = fi.readlines() with open(label_data, "r") as fi: label_list = fi.readlines() data_set = [] for i in range(feature_list.__len__()): data = Data(list(feature_list[i]), [list( label_list[i])[bit]]) # list(label_list[i])[0:1] 代表的是输出数据第0位构成的列表 data_set.append(data) return Dataset(data_set)
def process_user_inputs(classifier): while True: print("\nGive your inputs to validate or press ENTER to continue") values = classifier.get_data_size() data = [] for i in range(values): text = 'Enter value %s: ' % (i + 1) _data = input(text) if not _data: return data.append(float(_data)) predicted_value = classifier.predict(Data(data, None)) print('Predicted value: %s' % predicted_value)
def test1(): # Generate dataset np.random.seed(125) random.seed(239) n = 100 model_desc = semopy.model_generation.generate_desc(n_lat=1, n_endo=0, n_exo=0, n_inds=3, n_cycles=0) # show(model_desc) params, aux = semopy.model_generation.generate_parameters(model_desc) data_gen = semopy.model_generation.generate_data(aux, n) data_gen.index = [f's{i}' for i in range(n)] model_desc = """ eta1 =~ y1 + y2 + y3 """ data = Data(d_phens=data_gen) model = mtmlModel(model_desc=model_desc, data=data) model.show_mod() # self = model model.opt_bayes() print(model.unnormalize_params()) # ----------------------------------- # semopy sem_old = Model(model_desc) sem_old.fit(data_gen) print('semopy') insp = sem_old.inspect() insp = insp.loc[insp['op'] == '~', ['lval', 'rval', 'Estimate']] print(insp) # semba semba_model = semba.Model(model_desc) semba_model.fit(data_gen, num_samples=1000) print('semba') insp = semba_model.inspect() insp = insp.loc[insp['op'] == '~', ['lval', 'rval', 'Estimate']] print(insp) print('params') print(params) return model_desc, data, model
def __init__(self, path: str): """ Args: path: str,数据文件路径 """ self.train = list() self.test = list() self._path = path self._base = os.path.abspath(".") self.split_data(0.5) start = time.perf_counter() self.data = Data(self.train, self.test) end = time.perf_counter() print("读数据,建立对象用时 {0:10}".format(end - start))
def calc_gain(dataset, entropy, val, attr_index): attr_entropy = 0 total_examples = len(dataset.examples) # count for dataset class class_count = count_class_values(dataset.examples) new_dataset = Data() for example in dataset.examples: if (example[attr_index] == val): new_dataset.examples.append(example) attr_entropy = len( new_dataset.examples) / total_examples * calc_entropy(new_dataset) return attr_entropy
def main(): config = get_args() dataset = Data(config) num_features = dataset.features.shape[1] num_classes = dataset.labels.max().item() + 1 model = GCN(config=config, num_features=num_features, num_classes=num_classes) solver = Solver(config, model, dataset) if torch.cuda.is_available(): model = model.to('cuda') criterion, best_model = solver.train() solver.test(criterion, best_model)
def load_data(train_path, val_path, glove_path): data = Data(train_path, val_path, glove_path) train_x_list, _, val_x_list, _ = data.split_sentence() data.build_vocab() orig_data = train_x_list + val_x_list train_data = get_train_data(data.vocab, orig_data) print("数据实例个数: {}".format(len(train_data))) vocab_size = len(data.vocab) + 1 print("词表长度为:", vocab_size) dist = np.array([v for k, v in data.word_freq.items()]) dist = np.power(dist, 0.75) dist = dist / dist.sum() return train_data, data.vocab, vocab_size, dist
def main(): log = get_logger(LOG_DIR) transform_val = Compose([ Scale((224, 224)), ToTensor(), Normalize(mean=[0.45, 0.45, 0.45], std=[0.225, 0.225, 0.225]) ]) valset = Data(DATA_DIR, training=False, transform=transform_val) val_loader = DataLoader(valset, batch_size=1, num_workers=0) log.info('Data loaded.') log.info('Val samples:{}'.format(len(val_loader))) # set device device = torch.device('cpu') # torch.manual_seed(SEED) log.info('Torch Device:{}'.format(device)) # set model and optimizer net = FCNResNet() net.to(device) # net.init_weights() # pretrained resnet weights net.load_state_dict(torch.load('./checkpoints/06.22.23.14.41_ep36_val.pt'), strict=False) log.info('Model loaded.') for i, data in tqdm(enumerate(val_loader)): img, gt = data img = img.to(device) gt = gt.to(device) pred = net(img) iou = iou_loss(pred, gt) pred[pred < 0.5] = 0 print(iou.detach_().item()) pred = pred.detach_().numpy().squeeze(0).squeeze(0) pred = Image.fromarray((pred * 255).astype(np.uint8)) pred.save(f'{OUTPUT_DIR}/{i}.png')
def main(): args = sys.argv if (len(args) < 2): print("You should provide a filename to data.") filename1 = 'car.c45-names.txt' #attributes filename2 = 'car.data' # data examples else: filename1 = str(sys.argv[0]) filename2 = str(sys.argv[1]) # data examples dataset = Data() dataset.attr_file = filename1 dataset.data_file = filename2 dataset.read_attr_data() dataset.read_examples_data() # Proportion training set to testing set (1 means only training set) PROPORTION = 1 train_dtset = copy.deepcopy(dataset) test_dtset = copy.deepcopy(dataset) train_dtset.examples, test_dtset.examples = [], [] total = len(dataset.examples) # polluting train dataset train_index_list = random.sample(xrange(total), int(total * PROPORTION)) train_dtset.examples = [ dataset.examples[index] for index in train_index_list if (dataset.examples[index] not in train_dtset.examples) ] # polluting test dataset test_dtset.examples = [ ex for ex in dataset.examples if (ex not in train_dtset.examples) ] print("Computing tree...") root = compute_tree(train_dtset, None, None) tree_filename = 'results/tree.txt' with open(tree_filename, "w") as tree_file: write_tree(root, 0, tree_file)
def get_dataset(model_block, model, year=2018): global THRESHOLD kanapki = [] d = Data(year) a = d.load_dataset() b = a[:,0] model.mi=np.nanmean(b, axis=(0,1,2)).reshape(1,1,7) model.sigma=np.nanstd(b, axis=(0,1,2)).reshape(1,1,7) # print(b.shape, np.nanmean(a[:,1,:,:,0])) # for x, y in a: # if np.nanmean(y[:,:,0]) > 0.8: # print(np.nanmean(x[:,:,0]), np.nanmean(y[:,:,0])) for x, y in tqdm(a): x_map = model_block.get_input(x) kanapki.append( Kanapka( model_block=model_block, features=x_map, label=model_block.get_output(y), )) # FIXME: add [[augment]] """try: for _ in range(4): x_copy = x_map x_map = np.swapaxes(x_map, 0, 1) if np.array_equal(x_copy, x_map): break kanapki.append( Kanapka( model_block=model_block, features=x_map, label=model_block.get_output(y), )) except: pass""" return Dataset(model_block=model_block, kanapki=kanapki, threshold=THRESHOLD, model=model)
def load_data(path: str): data = Data() with open(path, 'r', encoding='utf-8') as f: while True: q = f.readline().strip('\n') if len(q) <= 0: break question = Question(json.loads(q)) question.parse = ParserTree(f.readline().strip('\n')) question.dep = list(DependencyTriple(i[0], i[1], i[2]) for i in json.loads(f.readline().strip('\n'))) question.ner = json.loads(f.readline().strip('\n')) ans_num = int(f.readline().strip('\n')) for i in range(ans_num): item = f.readline().strip('\n').split('\t') if len(item) > 1: question.add_answer(json.loads(item[0]), convert_bool(item[1])) else: question.add_answer(json.loads(item[0])) data.questions.append(question) return data
def main(args): device = torch.device(args.device) #we first see if there is a model with needed name in our models folder, model = torch.load(f"{args.model_folder}/{args.model_name}", map_location=device) model.eval() data_transforms = transforms.Compose([ToTensor()]) dataset = Data(args.filename_x, args.filename_y, args.data_root,transform=data_transforms) output = {"Super_resolution": []} for sample in dataset: lores = sample['x'].to(device).float() print(lores.shape) sures = model(lores.unsqueeze(0)).squeeze(0) output["Super_resolution"].append(sures.detach().cpu().data.numpy()) savemat(f"{args.model_folder}/{args.filename_out}", output)
def main(input_filepath, output_filepath, interim_filepath, config_filepath, tokenization): """ Turns raw data locate in .../raw into processed data locate in .../processed Keyword arguments: input_filepath -- filepath of raw/input data output_filepath -- filepath to put processed data interim_filepath -- filepath to folder to save interim/intermediary file config_filepath -- filepath to config_file used to set options executions tokenization -- columns to apply operation tokenization save_interim_files -- save each operation to separate files (append operation into final's filename) """ logger = logging.getLogger(__name__) if interim_filepath is not None: logger.info("Making final data set from interim data: {0}".format( str(interim_filepath))) else: logger.info("Making final data set from raw data: {0}".format( str(input_filepath))) logger.info("Processed file will be save in: {0}".format( str(output_filepath))) with open(config_filepath, "r") as stream: try: config_data = yaml.safe_load(stream) except yaml.YAMLError as exc: print(exc) # Manage arguments passed by user with config file yaml manage_arguments(config_data, tokenization, input_filepath, output_filepath, interim_filepath) # Load data data = Data(tokenization, input_filepath, output_filepath, interim_filepath) # Apply operations in file data.apply_operations()
def jsma_craft(surrogate_model, source_samples, target_info): import tensorflow as tf import numpy as np from dataset import Data from cleverhans.attacks import SaliencyMapMethod with tf.variable_scope(surrogate_model.scope): sess = tf.get_default_session() jsma = SaliencyMapMethod(surrogate_model.tf(), back='tf', sess=sess) one_hot_target = np.zeros((1, 10), dtype=np.float32) one_hot_target[0, target_info['target_class']] = 1 jsma_params = { 'theta': 1., 'gamma': 0.1, 'clip_min': 0., 'clip_max': 1., 'y_target': one_hot_target } # Loop over the samples we want to perturb into adversarial examples adv_xs = [] for sample_ind in range(len(source_samples)): sample = source_samples.getx(sample_ind) adv_xs.append(jsma.generate_np(sample, **jsma_params)) # TODO remove break if sample_ind == 2: break adv_ys = np.concatenate([one_hot_target] * len(adv_xs)) adv_xs_d = Data(np.concatenate(adv_xs), adv_ys) return source_samples, adv_xs_d
def main(): p=Path(__file__).parents[0] directory = os.path.abspath(os.path.join(p,"gemini_ETHUSD_d.csv")) data = Data(directory) train,test = data.split_data(test_size=0.2) numOfDays=20 x_train,x_test,y_train,y_test = data.prepare_data(train,test,numOfDays) model = Model() #hyperparameters tuning epochs = 50 optimizer='adam' loss='mean_squared_error' activation ='tanh' batch_size = 1 neurons = 30 model.LSTM_model(x_train,activation =activation,optimizer=optimizer,loss=loss,neurons=neurons) history = model.train(x_train,y_train,x_test,y_test,epochs=epochs,batch_size=batch_size) targets = test['Close'][numOfDays:] preds = model.predict(x_test).squeeze() print('MAE: ',mean_absolute_error(preds,y_test)) preds = test['Close'].values[:-numOfDays] * (preds + 1) preds = pd.Series(index=targets.index, data=preds) line_plot(targets, preds, 'actual', 'prediction', lw=3) line_plot(history.history['loss'],history.history['val_loss'],'train loss','test loss',lw=3)
def train(): # parameters for NN-training end_epoch = 20 batch_size = 16 n_workers = 8 # use gpu (if available) otherwise cpu device = 'cuda' if torch.cuda.is_available() else 'cpu' print('Creating Model') # copy the NN-model to GPU/CPU net = Net().to(device) # load dataset train_ds = Data(is_train=True) #read the data and put into memory train_dl = torch.utils.data.DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=n_workers) optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) criterion = Loss() plot_dict = { 'total': [], 'shape': [], 'color': [], 'coord': [], } print('Started Training') for epoch in range(end_epoch): losses = AverageMeter() bar = Bar(f'Epoch {epoch + 1}/{end_epoch}', fill='#', max=len(train_dl)) for i, data in enumerate(train_dl, 0): img, shape, color, coords, _ = data img = img.to(device) shape = shape.to(device) color = color.to(device) coords = coords.to(device) optimizer.zero_grad() outputs = net(img) labels = [shape, color, coords] loss, loss_dict = criterion(outputs, labels) losses.update(loss.item(), img.size(0)) loss.backward() optimizer.step() plot_dict['total'].append(loss.item()) for k, v in loss_dict.items(): plot_dict[k].append(v.item()) summary_string = f'({i + 1}/{len(train_dl)}) | Total: {bar.elapsed_td} | ' \ f'ETA: {bar.eta_td:} | loss: {losses.avg:.4f}' for k, v in loss_dict.items(): summary_string += f' | {k}: {v:.4f}' bar.suffix = summary_string bar.next() bar.finish() print('Finished Training') print(f'loss_avg: {losses.avg:.4f}') print('Saving model') save_plots(plot_dict) torch.save(net.state_dict(), MODEL_PATH)
def test(): test_ds = Data(is_train=False) test_dl = torch.utils.data.DataLoader(test_ds, batch_size=1, shuffle=True, num_workers=1) print('Creating Model') net = Net() net.load_state_dict(torch.load(MODEL_PATH, map_location='cpu')) # use gpu (if available) otherwise cpu device = 'cuda' if torch.cuda.is_available() else 'cpu' net = net.to(device).eval() result_filename = 'predicted_test_result.csv' write_to_file(text='filename,logo-name,x,y,color', file=result_filename) color_acc, shape_acc, coord_err = [], [], [] print('Started Testing') with torch.no_grad(): bar = Bar(f'Test', fill='#', max=len(test_dl)) for i, data in enumerate(test_dl, 0): img, shape, color, coord, fname = data img = img.to(device) shape = shape.to(device) color = color.to(device) coord = coord.to(device) * 128 output = net(img) pshape, pcolor, pcoord = output _, pshape = torch.max(pshape, 1) _, pcolor = torch.max(pcolor, 1) pcoord *= 128 img = img[0] pshape, pcolor, pcoord = pshape[0], pcolor[0], pcoord[0] tshape, tcolor, tcoord = shape[0], color[0], coord[0] pcoord = pcoord.cpu().numpy() tcoord = tcoord.cpu().numpy() pshape, tshape = label2shape[int(pshape)], label2shape[int(tshape)] pcolor, tcolor = label2color[int(pcolor)], label2color[int(tcolor)] color_acc.append(pcolor == tcolor) shape_acc.append(pshape == tshape) coord_err.append(np.absolute(pcoord - tcoord)) # title for sample images title = f'Predicted shape: {pshape}, color: {pcolor}, ' \ f'coord: ({int(pcoord[1].round())}/{int(pcoord[0].round())})\n' title += f'True shape: {tshape}, color: {tcolor}, ' \ f'coord: ({int(tcoord[1])}/{int(tcoord[0])})' # print 20 sample images if i < 20: plt.title(title, y=1.05) imshow(img, idx=i) ## code for writing to csv and than to xlsx fname = fname[0] write_to_file( f'{fname},' f'{pshape},' f'{int(pcoord[1].round())},' f'{int(pcoord[0].round())},' f'{pcolor}', result_filename) summary_string = f'Total: {bar.elapsed_td} | ETA: {bar.eta_td:}' bar.suffix = summary_string bar.next() bar.finish() read_file = pd.read_csv(result_filename) read_file.to_excel('predicted_test_result.xlsx', index=None, header=True) # mean error (x,y) coord_err = np.array(coord_err) mean_error_y = coord_err[0].mean() mean_error_x = coord_err[1].mean() # std error (x,y) std_error_y = coord_err[0].std() std_error_x = coord_err[1].std() print(f'Mean pixel error x: {mean_error_x:.4f}, y: {mean_error_y:.4f}') print(f'Std of error x: {std_error_x:.4f}, y: {std_error_y:.4f}') # color accuracy color_acc = np.array(color_acc) color_acc = (color_acc.sum() / color_acc.shape[0]) * 100 print(f'Color accuracy: {color_acc:.4f}') # shape accuracy shape_acc = np.array(shape_acc) shape_acc = (shape_acc.sum() / shape_acc.shape[0]) * 100 print(f'Shape accuracy: {shape_acc:.4f}')
def main(): data = Data() logistic_regression = models.LogisticRegression() neural_network = models.NeuralNet() svm = models.SupportVectorMachine(C=1.0, kernel='rbf', gamma='scale') random_forest = models.RandomForest(n_estimators=100, max_depth=None, random_state=None) # Process dataset training_data_features, training_data_labels, mnist_test_data_features, mnist_test_data_labels, \ usps_test_data_features, usps_test_data_labels, combined_test_data_features, combined_test_data_labels = \ data.pre_process() # Logistic Regression logistic_regression.fit(training_data_features, training_data_labels, learning_rate=0.01, epochs=500) accuracy_mnist, confusion_mnist = logistic_regression.predict( mnist_test_data_features, mnist_test_data_labels) accuracy_usps, confusion_usps = logistic_regression.predict( usps_test_data_features, usps_test_data_labels) accuracy_combined, confusion_combined = logistic_regression.predict( combined_test_data_features, combined_test_data_labels) print_and_plot('Logistic Regression', accuracy_mnist, accuracy_usps, accuracy_combined, confusion_mnist, confusion_usps, confusion_combined) # Neural Network neural_network.fit(training_data_features, training_data_labels, epochs=10) accuracy_mnist, confusion_mnist = neural_network.predict( mnist_test_data_features, mnist_test_data_labels) accuracy_usps, confusion_usps = neural_network.predict( usps_test_data_features, usps_test_data_labels) accuracy_combined, confusion_combined = neural_network.predict( combined_test_data_features, combined_test_data_labels) print_and_plot('Neural Network', accuracy_mnist, accuracy_usps, accuracy_combined, confusion_mnist, confusion_usps, confusion_combined) # Support Vector Machine svm.fit(training_data_features, training_data_labels) accuracy_mnist, confusion_mnist = svm.predict(mnist_test_data_features, mnist_test_data_labels) accuracy_usps, confusion_usps = svm.predict(usps_test_data_features, usps_test_data_labels) accuracy_combined, confusion_combined = svm.predict( combined_test_data_features, combined_test_data_labels) print_and_plot('SVM', accuracy_mnist, accuracy_usps, accuracy_combined, confusion_mnist, confusion_usps, confusion_combined) # Random Forest random_forest.fit(training_data_features, training_data_labels) accuracy_mnist, confusion_mnist = random_forest.predict( mnist_test_data_features, mnist_test_data_labels) accuracy_usps, confusion_usps = random_forest.predict( usps_test_data_features, usps_test_data_labels) accuracy_combined, confusion_combined = random_forest.predict( combined_test_data_features, combined_test_data_labels) print_and_plot('Random Forest', accuracy_mnist, accuracy_usps, accuracy_combined, confusion_mnist, confusion_usps, confusion_combined)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") criterion = LabelSmoothCELoss(reduction='none') skf = KFold(n_splits=3, shuffle=True, random_state=47) total_val = 0 for fold, (train_idx, val_idx) in enumerate(skf.split(train_df), 1): train_writer = SummaryWriter( log_dir=os.path.join('tbx_log', current_time, str(fold), 'train')) val_writer = SummaryWriter( log_dir=os.path.join('tbx_log', current_time, str(fold), 'val')) best_val = [] print('=' * 20, 'Fold', fold, '=' * 20) model = EfficientNet.from_pretrained(args.model, num_classes=4) # model = pretrainedmodels.se_resnext50_32x4d(num_classes=1000, pretrained='imagenet') # model.last_linear = nn.Linear(2048,3) model = model.to(device) train_set = Data(train_df.iloc[train_idx].reset_index(drop=True), train_transform) val_set = Data(train_df.iloc[val_idx].reset_index(drop=True), test_transform) train_loader = DataLoader(dataset=train_set, batch_size=args.bs, shuffle=True) val_loader = DataLoader(dataset=val_set, batch_size=16, shuffle=False) optim = torch.optim.Adam(model.parameters(), lr=0.0005) # optim = SWA(base_optim, swa_start=770, swa_freq=77, swa_lr=0.0001) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optim, T_max=args.epoch + 5, eta_min=5e-6) # scheduler_warmup = GradualWarmupScheduler(optim, multiplier=1, total_epoch=5, after_scheduler=scheduler) for epoch in range(1, args.epoch + 1): model.train() running_loss = 0.0 for i, data in enumerate(train_loader, 0):
import torch import loadModel from dataset import Data import Criterion classifier = loadModel.load() # device = 'cpu' device = 'cuda:0' trainData = Data(test=False) criterion = Criterion.Criterion() batch_size = 32 epochs = 200 alpha = 1e-2 momentum = 0.9 for epoch in range(epochs): if epoch == 60: alpha = 5e-3 elif epoch == 100: alpha = 1e-3 correct = 0 count = 0 for i in range(0, trainData.m, batch_size): # print i X, y = trainData.sample(batch_size, i) classifier.clearGradParam() y_pred = classifier.forward(X) # print y_pred