def __init__(self, vocab_fname): with open(vocab_fname) as f: vocab = json.load(f) self.tgt_itos = vocab['tgt_vocab']['itos'] self.input_stoi = vocab['input_vocab']['stoi'] self.ent_stoi = vocab['ent_vocab']['stoi'] self.dataset = Dataset()
def train_model_main(option): #load dataset dataset = Dataset (DATABASE_ROOT_DIR) dataset.main() #build model VGG_16 = Model() VGG_16.build_model(dataset) train_vgg = Train(VGG_16) train_vgg.setCallbacks(option) global MODEL_PATH if option == 'DataShuffleSplit': MODEL_PATH = os.path.abspath(os.path.join(MODEL_PATH, "ShuffleSplit_model.h5")) train_vgg.train_with_SplitedData(dataset) VGG_16.evaluate_model(train_vgg.test_image, train_vgg.test_label) VGG_16.save_model(MODEL_PATH) elif option == 'KFoldM': MODEL_PATH = os.path.abspath(os.path.join(MODEL_PATH, "KFold_Manual_model.h5")) train_vgg.train_with_CrossValidation_Manual(dataset) VGG_16.save_model(MODEL_PATH) elif option == 'KFoldW': MODEL_PATH = os.path.abspath(os.path.join(MODEL_PATH, "KFold_Wrapper_model.h5")) train_vgg.train_with_CrossValidation_Wrapper(dataset) VGG_16.save_model(MODEL_PATH) elif option == 'GridSearch': print ("[WARNING!!!] THIS mode is not available!") exit(0) train_vgg.train_with_GridSearchCV(dataset) elif option == 'help': print_usage(sys.argv[0]) else: print_usage(sys.argv[0])
def __init__(self, datadir, save_dir=current_dir, **kwargs): self.datadir = datadir self.save_dir = save_dir self.gpu = False for k, v in kwargs.items(): setattr(self, k, v) ''' Default are: arch=vgg13, learning_rate=0.01 hidden_units=512 epochs=20 gpu=False ''' self.device = self.setDevice() self.dataset = DS(self.datadir) self.dataset.transform() self.trainloader, self.validloader, self.testloader = self.dataset.init_loaders( )
print('{0} = {1}'.format(arg, getattr(args, arg))) torch.manual_seed(args.seed) # training on the first GPU if not on CPU device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print('Training on device = {}'.format(device)) """ =========================================================================== Loading data =========================================================================== """ data = Data(path=args.data_path, dataset=args.dataset, split=args.split) print('Loaded {0} dataset with {1} nodes and {2} edges'.format( args.dataset, data.n_node, data.n_edge)) feature = data.feature.to(device) label = data.label.to(device) train = Dataset(data.idx_train) val = Dataset(data.idx_val) test = Dataset(data.idx_test) train_loader = DataLoader(dataset=train, batch_size=args.batch_size) val_loader = DataLoader(dataset=val, batch_size=args.batch_size) test_loader = DataLoader(dataset=test, batch_size=args.batch_size) sampler = Sampler(data.adj, args.aggregator) """ =========================================================================== Training =========================================================================== """ model = SupervisedGraphSAGE(n_feature=data.n_feature, n_hidden=args.hidden, n_class=data.n_class, agg_type=args.aggregator,
import random import matplotlib.pyplot as plt from load_data import Dataset, TestDataset from model import Net batch_size = 1 n_iter = 150 lr = 0.001 random_seed = 60 save_path = 'save_model/' device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') torch.manual_seed(random_seed) dataset = Dataset() loader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True) print('dataset: {}'.format(len(dataset))) mse = nn.MSELoss() bce = nn.BCELoss() L1_loss = nn.L1Loss() load_model_path = 'save_model/epoch150_loss_35.7894.pth.tar' print('==> Building model...') net = Net() net.load_state_dict(torch.load(load_model_path)) criterion = mse
lambda row: normalize(row, maximums, minimums), axis=1) print(data.columns) for month in range(1, 13): for day in range(1, 32): data_by_day = data[data['month'] == month] data_by_day = data_by_day[data_by_day['day'] == day] if len(data_by_day) > 0: by_day_y.append( statistics.mean(data_by_day['normalized_volume'])) by_day_x.append(f'{month}-{day}') plt.plot(by_day_x, by_day_y) print('weak days') for day, mean_volume in zip(by_day_x, by_day_y): if mean_volume < 0.35: print(day) print('peak days') for day, mean_volume in zip(by_day_x, by_day_y): if mean_volume > 0.55: print(day) plt.show() dataset = Dataset(file_name) model = BaselineModel(dataset, {}) for year in range(2002, 2017, 2): predictions = model.predict(f'{year}-01-01', f'{year+1}-12-31') gold = dataset.get_subset(f'{year}-01-01', f'{year+1}-12-31')['Volume'] mse, r2 = evaluate(gold, predictions) print(f'{year}, {year+1}: MSE: {mse}, R2: {r2}')
test += 1 aux_clusters[curr_indx].remove(k) aux_clusters[pred_idx].append(k) self.obj_to_cluster[k] = pred_idx self.clusters = aux_clusters return test def objective_function(self): print("calculating J_kcm_f_gh...") result = 0. self.part2 = self.__all_against_all_cluster_sum() for i in range(self.c): for k in self.clusters[i]: result += 1 - self.__object_against_cluster_sum(k, i) + self.part2[i] return result def rand_score(self): return adjusted_rand_score(labels_true=self.y, labels_pred=self.obj_to_cluster.values()) if __name__ == "__main__": import pandas as pd from load_data import Dataset datadir='../../data/segmentation_2.test' df = pd.read_csv(datadir, sep=',') mydata = Dataset() mydata.load(df, 'rgb') kcm = KCM_F_GH(c=7, p=mydata.X.values.shape[1], data=mydata)
print(args) torch.manual_seed(args.seed) device = torch.device("cuda") if not os.path.exists('./ckpt/'): os.makedirs('./ckpt/') if not os.path.exists('./iter_num/' + args.model_name): os.makedirs('./iter_num/' + args.model_name) if not os.path.exists('./logs/' + args.model_name): os.makedirs('./logs/' + args.model_name) if not os.path.exists('./labels/' + args.model_name): os.makedirs('./labels/' + args.model_name) if not os.path.exists('./c/'): os.makedirs('./c/') dataset = Dataset(args) change_itr = range(8000, 100000, 4000) logger = Logger('./logs/' + args.model_name) if args.env_name == 'bimgame': model = ConvModel(3, args.num_subgoals, use_rnn=False).to(device) else: model = MLPModel(46, args.num_subgoals, use_rnn=False).to(device) start_itr = 0 c = [] if args.one_class: if args.pretrained_ckpt is not None: model.load_state_dict( torch.load('./ckpt/' + args.pretrained_ckpt + '.pkl')) start_itr = np.load('./iter_num/' + args.pretrained_ckpt + '.npy') c = torch.from_numpy(
print('Training on device = {}'.format(device)) """ =========================================================================== Loading data =========================================================================== """ data = Data(path=args.data_path, dataset=args.dataset, split=args.split) print('Loaded {0} dataset with {1} nodes and {2} edges'.format(args.dataset, data.n_node, data.n_edge)) feature = data.feature.to(device) norm_adj = data.norm_adj.to(device) label = data.label.to(device) label_train = label[data.idx_train] label_val = label[data.idx_val] label_test = label[data.idx_test] train = Dataset(torch.arange(len(data.idx_train))) train_loader = DataLoader(dataset=train, batch_size=args.batch_size) sampler = Sampler(data.feature[data.idx_train], data.norm_adj_train, args.sample) """ =========================================================================== Training =========================================================================== """ # Model and optimizer model = FastGCN(n_feature=data.n_feature, n_hidden=args.hidden, n_class=data.n_class, dropout=args.dropout).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) metric = torchmetrics.Accuracy().to(device) for epoch in range(1, args.epoch+1): t = time.time()
from load_data import Dataset import tensorflow as tf from tensorflow import keras import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split import os from create_model import create_model data = Dataset() data_train, data_test, labels_train, labels_test = train_test_split( data.dataset, data.labels, test_size=0.20, random_state=42) class_names = ["Not fire", "Fire"] plt.figure() plt.imshow(data_train[1]) plt.colorbar() plt.grid(False) plt.show() data_train = data_train / 255 data_test = data_test / 255 plt.figure(figsize=(10, 10)) for i in range(25): plt.subplot(5, 5, i + 1) plt.xticks([]) plt.yticks([]) plt.grid(False) plt.imshow(data_train[i], cmap=plt.cm.binary) plt.xlabel(class_names[labels_train[i]]) plt.show()
from load_data import Dataset from policy import Network d = Dataset() d.prepare("/tftpboot/cv/data2/") n = Network() n.initialize_variables("./saved_network/") print("dataset length = ", d.length) for i in range(100000): traindata = d.getdata(32) n.train(traindata, i) n.save_variables("./saved_network/test", 1)
else: device_id = torch.cuda.current_device() print('using device', device_id, torch.cuda.get_device_name(device_id)) device = torch.device("cuda") print('DEVICE:', device) if __name__ == '__main__': random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) # load data ds = args.dataSet data = Dataset(dataset='cora', path='../GraphSAGE/') data.load_data() feat = torch.FloatTensor(data.cora_feats).to(device) num_labels = len(set(getattr(data, ds + '_labels'))) graphSage = GraphSage(2, feat.size(1), 128, feat, getattr(data, ds + '_adj_lists'), device, gcn=args.gcn, agg_func=args.agg_func).to(device) classification = Classification(128, num_labels).to(device)
parser.add_argument('--hidden_size', type=int, default=300, help='size of hidden tensor') parser.add_argument('--lr', type=float, default=1e-3) parser.add_argument('--wdecay', type=float, default=1e-8) parser.add_argument('--cuda', default=0, type=int) parser.add_argument('--eval', action='store_true', help='evaluate model') args = parser.parse_args() sys.stdout = open('train.log', 'w+') # create a corpus to save time if args.create_corpus == 1: print("Creating New Corpus") corpus = Dataset() corpus_save_path = "nyt_corpus.pkl" with open(corpus_save_path, 'wb') as output: pickle.dump(corpus, output, pickle.HIGHEST_PROTOCOL) print("Number of training samples = ", len(corpus.train_sentences)) print("Number of testing samples = ", len(corpus.test_sentences)) assert len(corpus.train_sentences) == len(corpus.train_labels) assert len(corpus.test_sentences) == len(corpus.test_labels) else: print("Loading saved Corpus") corpus_save_path = "nyt_corpus.pkl" with open(corpus_save_path, 'rb') as input_: corpus = pickle.load(input_) print("Number of training samples = ", len(corpus.train_sentences)) print("Number of testing samples = ", len(corpus.test_sentences))
if __name__ == '__main__': datadir = '../../data/segmentation_2.test' result_dir = '../../results/clustering' result_file = 'results' bresult_file = 'best_result' view = 'rgb' norm = True result_file = '{}{}_{}'.format(view, '_norm' if norm else '', result_file) bresult_file = '{}{}_{}'.format(view, '_norm' if norm else '', bresult_file) df = pd.read_csv(datadir, sep=',') mydata = Dataset() mydata.load(df, view) # Variáveis para armazenar resultado da execução do algoritmo res_obj_function = [] res_cluster = [] res_obj_to_cluster = [] res_hp = [] res_ari = [] # adjusted rand indexes list res_J = [] # uma lista com a melhor serie de convergência de J # executar o algoritmo 100x for epoch in range(5): start_total_time = time.time() # inicialização do algoritmo kcm = KCM_F_GH(c=7, p=mydata.X.shape[1], data=mydata, norm=norm) kcm.initialization()
class Model: current_dir = os.path.dirname(os.path.realpath(__file__)) def __init__(self, datadir, save_dir=current_dir, **kwargs): self.datadir = datadir self.save_dir = save_dir self.gpu = False for k, v in kwargs.items(): setattr(self, k, v) ''' Default are: arch=vgg13, learning_rate=0.01 hidden_units=512 epochs=20 gpu=False ''' self.device = self.setDevice() self.dataset = DS(self.datadir) self.dataset.transform() self.trainloader, self.validloader, self.testloader = self.dataset.init_loaders( ) def __str__(self): return '{0.__class__.__name__}:(\n\tarch={0.arch}\n ' \ '\tlearning_rate={0.learning_rate}\n' \ '\thidden_units={0.hidden_units}\n' \ '\tepochs={0.epochs}\n' \ '\tsave_Dir={0.save_dir}\n' \ '\tgpu={0.gpu})\n' \ '\tdevice={0.device}\n'.format(self) # Use GPU if it's available def setDevice(self): if self.gpu and torch.cuda.is_available(): self.device = torch.device("cuda") else: self.device = torch.device("cpu") print('device set to {}'.format(self.device)) return self.device def setModel(self, arch): print("\nsetting up model...\n") # alexnet = models.alexnet(pretrained=True) # squeezenet = models.squeezenet1_0(pretrained=True) # vgg11 = models.vgg11(pretrained=True) # vgg13 = models.vgg13(pretrained=True) # vgg16 = models.vgg16(pretrained=True) # vgg19 = models.vgg19(pretrained=True) # densenet = models.densenet161(pretrained=True) # inception = models.inception_v3(pretrained=True) # googlenet = models.googlenet(pretrained=False) # shufflenet = models.shufflenet_v2_x1_0(pretrained=True) # mobilenet = models.mobilenet_v2(pretrained=True) # resnext50_32x4d = models.resnext50_32x4d(pretrained=True) switcher = { 'alexnet': models.alexnet(pretrained=True), 'squeezenet': models.squeezenet1_0(pretrained=True), 'vgg16': models.vgg16(pretrained=True), 'inception': models.inception_v3(pretrained=True), #'googlenet': models.googlenet(pretrained=False), #'mobilenet': models.mobilenet_v2(pretrained=True), #'resnext50_32x4d' : models.resnext50_32x4d(pretrained=True), 'vgg11': models.vgg11(pretrained=True), 'vgg13': models.vgg13(pretrained=True), 'vgg16': models.vgg16(pretrained=True), } error = "\nThat model is not supported yet. The supported models are : 'alexnet', squeezenet',\ 'vgg11', 'vgg13', 'vgg16', 'vgg19', 'inception', 'googlenet', 'mobilenet', 'resnext50_32x4d' " self.model = switcher.get(arch, error) if self.model == error: print(error) else: print('\nmodel successfully set to {}'.format(arch)) # Freeze parameters so we don't backprop through them def create_classifier(self): print("\ncreating classifier...") for param in self.model.parameters(): param.requires_grad = False self.model.classifier = nn.Sequential( nn.Linear(512 * 7 * 7, 4000), nn.ReLU(), nn.Dropout(0.2), nn.Linear(4000, 1280), nn.Linear(1280, self.hidden_units), nn.Linear(self.hidden_units, 102), nn.LogSoftmax(dim=1)) self.criterion = nn.NLLLoss() # Only train the classifier parameters, feature parameters are frozen self.optimizer = optim.Adam(self.model.classifier.parameters(), lr=0.003) #optimizer.zero_grad() images, labels = next(iter(self.validloader)) ps = torch.exp(self.model(images)) #print("shape should be [64, 102]", ps.shape) top_p, top_class = ps.topk(1, dim=1) print(top_class[:10, :]) equals = top_class == labels.view(*top_class.shape) accuracy = torch.mean(equals.type(torch.FloatTensor)) print(f'Accuracy: {accuracy.item() * 100}%') print('\ncheck results to see if classifer is configured correctly.') def train_model(self): device = self.setDevice() self.model.to(self.device) print( '\ntraining {} on {}, for {} epochs. Optimizer learning rate set to {}...' .format(self.arch, self.device, self.epochs, self.learning_rate)) epochs = self.epochs steps = 0 criterion = nn.NLLLoss() optimizer = optim.Adam(self.model.classifier.parameters(), lr=self.learning_rate) train_losses, test_losses = [], [] for epoch in range(epochs): running_loss = 0 for inputs, labels in self.trainloader: inputs, labels = inputs.to(self.device), labels.to(self.device) optimizer.zero_grad() logps = self.model.forward(inputs) loss = criterion(logps, labels) loss.backward() optimizer.step() running_loss += loss.item() else: ## TODO: Implement the validation pass and print out the validation accuracy test_loss = 0 accuracy = 0 # Turn off gradients for validation, saves memory and computations with torch.no_grad(): for images, labels in self.validloader: images, labels = images.to(self.device), labels.to( self.device) log_ps = self.model(images) batch_loss = criterion(log_ps, labels) test_loss += batch_loss.item() ps = torch.exp(log_ps) top_p, top_class = ps.topk(1, dim=1) equals = top_class == labels.view(*top_class.shape) accuracy += torch.mean(equals.type( torch.FloatTensor)).item() train_losses.append(running_loss / len(self.trainloader)) test_losses.append(test_loss / len(self.validloader)) print( "Epoch: {}/{}.. ".format(epoch + 1, epochs), "Training Loss: {:.3f}.. ".format(running_loss / len(self.trainloader)), "Test Loss: {:.3f}.. ".format(test_loss / len(self.validloader)), "Test Accuracy: {:.3f}".format(accuracy / len(self.validloader))) def validate_model(self): test_loss = 0 accuracy = 0 test_losses = [] # Turn off gradients for validation, saves memory and computations with torch.no_grad(): for images, labels in self.testloader: images, labels = images.to(self.device), labels.to(self.device) log_ps = self.model(images) batch_loss = self.criterion(log_ps, labels) test_loss += batch_loss.item() ps = torch.exp(log_ps) top_p, top_class = ps.topk(1, dim=1) equals = top_class == labels.view(*top_class.shape) accuracy += torch.mean(equals.type(torch.FloatTensor)).item() test_losses.append(test_loss / len(self.testloader)) print("Test Loss: {:.3f}.. ".format(test_loss / len(self.testloader)), "Test Accuracy: {:.3f}".format(accuracy / len(self.testloader))) def save_model_checkpoint(self): print("\nOur model: \n\n", self.model, '\n') self.model.epochs = self.epochs self.model.class_to_idx = self.trainloader.dataset.class_to_idx print('model.epochs: ', self.model.epochs) print("The state dict keys: \n\n", self.model.state_dict().keys()) checkpoint_out = self.save_dir + '/' + 'checkpoint2.pth' checkpoint = { 'input_size': [3, 224, 224], 'output_size': 102, 'arch': self.arch, 'state_dict': self.model.state_dict(), 'epoch': self.model.epochs, 'class_to_idx': self.model.class_to_idx } print('\n\nsaving to {}.'.format(checkpoint_out)) try: torch.save(checkpoint, checkpoint_out) except: print('Checkpoint did not save.') print('Checkpoint successful.')
from load_data import Dataset import numpy as np import os np.random.seed(1729) if __name__ == '__main__': data = Dataset(os.getcwd()) """MNIST params""" num_examples = 60000 inp_shape = 784 num_classes = 10 assert (data.num_examples() == num_examples) assert (data.inp_shape() == inp_shape) assert (data.num_classes() == num_classes) for _ in range(10000): batch_size = np.random.randint(low=1, high=num_examples) batch_x, batch_y = data.next_batch(batch_size) assert (batch_x.shape[0] == batch_size) assert (batch_y.shape[0] == batch_size) assert (batch_x.shape[1] == inp_shape)