def test_inference(model, test_data_path): ds_test = create_dataset(test_data_path).create_dict_iterator() data = next(ds_test) images = data["image"].asnumpy() labels = data["label"].asnumpy() output = model.predict(Tensor(data["image"])) pred = numpy.argmax(output.asnumpy(), axis=1) err_num = [] index = 1 for i in range(len(labels)): plt.subplot(4, 8, i + 1) color = 'blue' if pred[i] == labels[i] else 'red' plt.title("pre:{}".format(pred[i]), color=color) plt.imshow(numpy.squeeze(images[i])) plt.axis("off") if color == 'red': index = 0 print( "Row {}, column {} is incorrectly identified as {}, the correct value should be {}" .format(int(i / 8) + 1, i % 8 + 1, pred[i], labels[i]), '\n') if index: print("All the figures in this group are predicted correctly") print(pred, "<--Predicted figures") print(labels, "<--The right number") plt.show()
def make_ds(Filenames,context,zero_delimiter = False): X,Y = 0,0 if zero_delimiter: xdel = np.zeros((1, context, 257)) ydel = np.zeros((1, context, 257 * 5)) for fp in Filenames: real_wiener_all, wiener_all = readmat(fp, read='inout', keys=('Input', 'Output')) dslength = len(wiener_all) X_ = np.zeros((dslength, 1, 257 * 5)) Y_ = np.zeros((dslength, 1, 257)) for i in range(np.size(X_, 0)): x = wiener_all[i].reshape((1, 257 * 5)) # x = np.array([max(x[0][i],-35) for i in range (1285)]) X_[i][0] = x for i in range(np.size(Y_, 0)): y = real_wiener_all[i].reshape((1, 257)) # y = np.array([max(y[0][i], -35) for i in range(257)]) Y_[i][0] = y if context: X_,Y_ = create_dataset(X_,Y_,context) X_ = np.squeeze(X_) Y_ = np.squeeze(Y_) if zero_delimiter: newX_ = np.zeros((len(X_)+1,context,257*5)) newY_ = np.zeros((len(Y_)+1,context,257)) newX_[1:len(X_)+1,:,:] = X_ newY_[1:len(Y_)+1,:,:] = Y_ X_=newX_ Y_ =newY_ if type(X) != type(X_): X = X_ Y = Y_ else: X = np.concatenate((X, X_)) Y = np.concatenate((Y, Y_)) print(X.shape) return X,Y
def test_net(network, model, mnist_path): """Define the evaluation method""" print("==================== Starting Testing ===============") param_dict = load_checkpoint( "./model/ckpt/mindspore_quick_start/checkpoint_lenet-1_1874.ckpt") load_param_into_net(network, param_dict) ds_eval = create_dataset(os.path.join(mnist_path, "test")) acc = model.eval(ds_eval, dataset_sink_mode=False) print("==================== Accuracy:{} ===============".format(acc))
def create_datasets(): X_train, Y_train, X_test, Y_test = cr.create_dataset() Y_train = Y_train.reshape(-1, 3, 32, 32) X_train = X_train.reshape(-1, 3, 32, 32) Y_test = Y_test.reshape(-1, 3, 32, 32) X_test = X_test.reshape(-1, 3, 32, 32) return X_train.astype(np.float32), Y_train.astype(np.float32), X_test.astype(np.float32), Y_test.astype(np.float32)
def train_and_eval_crf(date_file,save_file,save=True): '''Train and evaluate a CRF model. Option to save F1 Score.''' x,y = create_dataset(data_file,"CRF") x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=42) crf = train_crf(x_train,y_train) f1 = eval_crf(crf,x_test,y_test) if save: desc = "Simple CRF Model" note = "None" save_f1(save_file,f1,len(x),desc,note) return
def __init__(self, dataset_name="dataset_hrl.pk"): # Dataset = create_dataset() # pickle.dump(Dataset, open("dataset_hrl.pk", "wb")) import pickle if os.path.exists(dataset_name): self.data = pickle.load(open(dataset_name, "rb")) else: from create_dataset import create_dataset self.data = create_dataset() pickle.dump(self.data, open(dataset_name, "wb")) self.data = self.data self.index = 0 self.dataset_size = len(self.data)
def train_and_eval_BiLSTM(data_file,save_file,save=True,embed_size=100,epochs=50,batch_size=32,val_size=0.1): '''Train and evaluate a Keras BiLSTM Model. Option to save F1 Score.''' x,y,word_ids,tag_ids = create_dataset(data_file,"LSTM") x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=42) custom_emb = Word2Vec.load("./data/word2vec_numeric_encs.model") embed_matrix,_ = create_weight_matrix(word_ids,custom_emb) model = create_BiLSTM(len(word_ids),len(tag_ids),embed_size,50,embed_matrix) history = train_BiLSTM(model,x_train,y_train,batch_size,epochs,val_size) f1 = eval_BiLSTM(model,x_test,y_test,tag_ids) if save: desc = f"BiLSTM-EmbedSize-{embed_size}-Word2Vec" note = "Word2Vec Embeddings" save_f1(save_file,f1,len(x),desc,note) return
def __init__(self, filenames): """ constructor to create a sequenceModel object identify the number of records in the dataset """ print("\n", "+" * 10, filenames, "+" * 10, "\n") tf.reset_default_graph() with tf.name_scope('seqModel'): # get number of records in the dataset # self.num_records = sum(1 for _ in tf.data.TFRecordDataset(filenames)) # tf.disable_eager_execution() # create a dataset iterator to pass to the model next_element, dataset_init_op = create_dataset(filenames) # use iterator generator to create data variables self.createDataIterators(next_element) self.createParams() self.dataset_init_op = dataset_init_op self.sess = tf.Session()
def create_model_metadata(): aid = create_architecture.create_architecture() did = create_dataset.create_dataset() url = 'http://localhost:8080/api/v1/model' model = { "is_public": True, "title": "Test Model {id}".format(id=int(time.time()) % 1000), "description": "Test test test", "labels": ["a", "b", "c"], "architecture": aid, "dataset": did } resp = utils.post(url, json=model) print('status:', resp.status_code, 'data:', resp.text) if resp.status_code == 200: return resp.json()['id']
def prepare(self): df = self.get_gcp_dataframe() dict_of_image_names_with_its_gcp_cordinates = {} pickle_exist = False dict_pickle_path = os.path.join('pickle', 'dict.pickle') if os.path.exists(dict_pickle_path): with open(dict_pickle_path, 'rb') as (f): dict_of_image_names_with_its_gcp_cordinates = pickle.load(f) pickle_exist = True for index, row in enumerate(df.iterrows()): if pickle_exist: break print(index) image_name: str = df['FileName'].iloc[index] gcp_location: str = df['GCPLocation'].iloc[index] print(gcp_location, image_name) path = os.path.join(self.data_set_path, image_name) if not os.path.exists(path): with open('not_found.txt', 'a') as the_file: the_file.write(path + '\n') continue dict_of_image_names_with_its_gcp_cordinates = crop_image( path, gcp_location, dict_of_image_names_with_its_gcp_cordinates) with open(dict_pickle_path, 'wb') as (f): pickle.dump(dict_of_image_names_with_its_gcp_cordinates, f, pickle.HIGHEST_PROTOCOL) data_set, target_set = create_dataset( dict_of_image_names_with_its_gcp_cordinates) print(target_set[0]) print(data_set.shape) print("loading Model") model = ModelClass() model.create_model() model.train_model(data_set, target_set)
import config import create_dataset import MODELS from keras.callbacks import EarlyStopping, ModelCheckpoint train_dir = config.dir.train_dir height = config.standard_vals.height width = config.standard_vals.width channels = config.standard_vals.channels (X_train, X_val, y_train, y_val) = create_dataset.create_dataset(train_dir, height, width, channels) # creating one hot encoded labels targets_series = pd.Series(y_train) one_hot = pd.get_dummies(targets_series, sparse=True) y_train = np.asarray(one_hot) targets_series = pd.Series(y_val) one_hot = pd.get_dummies(targets_series, sparse=True) y_val = np.asarray(one_hot) #model = MODEL.inceptionv3(height,width,channels) model = MODEL.simple_model(height, width, channels) early_stop = EarlyStopping(monitor='val_loss', patience=4, verbose=1) checkpointer = ModelCheckpoint(filepath='cnnbest.hdf5',
- direction would just be south, east etc. with numbers 0-3 maybe. - randomly skip some squares to get test set? """ parser = argparse.ArgumentParser() parser.add_argument('--model', type=str) parser.add_argument('--phase', type=str, default="train") args = parser.parse_args() model_name = args.model phase = args.phase device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") amount = 30 if model_name in ['viewpool', 'multiple']: image_datasets = create_dataset.create_dataset(multiple=True, amount=amount) else: image_datasets = create_dataset.create_dataset(amount=amount) dataloaders = { x: torch.utils.data.DataLoader(image_datasets[x], batch_size=8, shuffle=True, num_workers=4) for x in ['train', 'val', 'test'] } dataset_sizes = { x: len(image_datasets[x]) for x in ['train', 'val', 'test'] }
def create_d(): Chromosome().replace_exon() os.system('./grab_sequence.sh') create_dataset('train', 'all')
# create the network network = LeNet5() # define the optimizer net_opt = nn.Momentum(network.trainable_params(), lr, momentum) # define the loss function net_loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') epoch_size = 5 mnist_path = "./MNIST/" model_path = "./model/ckpt/mindspore_quick_start/" repeat_size = 1 ds_train = create_dataset(os.path.join(mnist_path, "train"), 32, repeat_size) ds_eval = create_dataset(os.path.join(mnist_path, "test"), 32) # clean up old run files before in Linux os.system('rm -rf {0}*.ckpt {0}*.meta {0}*.pb'.format(model_path)) # define the model model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()}) # save the network model and parameters for subsquenece fine-tuning config_ck = CheckpointConfig(save_checkpoint_steps=375, keep_checkpoint_max=16) # group layers into an object whith tarining and evaluation features ckpoint_cb = ModelCheckpoint(prefix="checkpoint_lenet", directory=model_path,
import torch import models import create_dataset if __name__ == '__main__': device = 'cuda:0' model = models.create_basic_model(device) model.load_state_dict(torch.load("models/basic_model.pt")) model.to(device) model.eval() image_datasets = create_dataset.create_dataset(False, 20) dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=1, shuffle=True, num_workers=1) for x in ['train', 'val', 'test']} dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val', 'test']} preds = [] gts = [] for im, trgt in dataloaders['test']: pred = torch.argmax(model(im.to(device))) preds.append(pred.to('cpu')) gts.append(trgt) preds = torch.Tensor(preds)
def cnn_categorization(model_type="base", data_path="image_categorization_dataset.pt", contrast_normalization=False, whiten=False): """ Invokes the dataset creation, the model construction and training functions Arguments -------- model_type: (string), the type of model to train. Use 'base' for the base model and 'improved for the improved model. Default: base data_path: (string), the path to the dataset. This argument will be passed to the dataset creation function contrast_normalization: (boolean), specifies whether or not to do contrast normalization whiten: (boolean), specifies whether or not to whiten the data. """ # Do not change the output path # but you can uncomment the exp_dir if you do not want to save the model checkpoints output_path = "{}_image_categorization_dataset.pt".format(model_type) exp_dir = "./{}_models".format(model_type) train_ds, val_ds = create_dataset(data_path, output_path, contrast_normalization, whiten) # specify the network architecture and the training policy of the models under # the respective blocks if model_type == "base": # create netspec_opts netspec_opts = { "kernel_size": [3, 0, 0, 3, 0, 0, 3, 0, 0, 8, 1], "num_filters": [16, 16, 0, 32, 32, 0, 64, 64, 0, 0, 16], "stride": [1, 0, 0, 2, 0, 0, 2, 0, 0, 1, 1], "layer_type": [ "conv", "bn", "relu", "conv", "bn", "relu", "conv", "bn", "relu", "pool", "conv" ] } # create train_opts train_opts = { "lr": 0.1, "weight_decay": 0.0001, "batch_size": 128, "momentum": 0.9, "num_epochs": 25, "step_size": 20, "gamma": 0.1 } # create model base on tetspect_opts model = cnn_categorization_base(netspec_opts) elif model_type == "improved": # create netspec_opts netspec_opts = { "kernel_size": [3, 0, 0, 2, 3, 0, 0, 2, 3, 0, 0, 8, 1], "num_filters": [32, 32, 0, 0, 64, 64, 0, 0, 128, 128, 0, 0, 16], "stride": [1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0, 1, 1], "layer_type": [ "conv", "bn", "relu", "pool", "conv", "bn", "relu", "pool", "conv", "bn", "relu", "pool", "conv" ] } # create train_opts train_opts = { "lr": 0.1, "weight_decay": 0.0001, "batch_size": 128, "momentum": 0.9, "num_epochs": 25, "step_size": 20, "gamma": 0.1 } # create improved model model = cnn_categorization_improved(netspec_opts) else: raise ValueError(f"Error: unknown model type {model_type}") # uncomment the line below if you wish to resume training of a saved model # model.load_state_dict(load(PATH to state)) # train the model train(model, train_ds, val_ds, train_opts, exp_dir) # save model's state and architecture to the base directory state_dictionary_path = f"{model_type}_state_dict.pt" save(model.state_dict(), state_dictionary_path) model = {"state": state_dictionary_path, "specs": netspec_opts} save(model, "{}-model.pt".format(model_type)) plt.savefig(f"{model_type}-categorization.png") plt.show()
from create_dataset import create_dataset from utils import distrib train, val = create_dataset('semantic_segmentation_dataset.pt') classcount, rgbmean = distrib(val)
from create_dataset import create_dataset, download_data import numpy as np # download_data(num_weeks=100) dataset = np.loadtxt('dataset.csv', delimiter=',') # 0~16238 train_dataset = dataset[:-5000] test_dataset = dataset[-5000:] trainX, trainY = create_dataset(train_dataset, lookback=24) testX, testY = create_dataset(test_dataset, lookback=24) print(trainX.shape) print(trainY.shape)
def semantic_segmentation(model_type="base"): """ sets up and trains a semantic segmentation model Arguments --------- model_type: (String) a string in {'base', 'improved'} specifying the targeted model type """ # the dataset train_dl, val_dl = create_dataset("semantic_segmentation_dataset.pt") # an optional export directory exp_dir = f"{model_type}_models" classcount, rgbmean = distrib(train_dl) classcount = 1 / classcount classcount = classcount.to(device) if model_type == "base": # specify netspec_opts netspec_opts = { "name": [ "conv_1", "bn_1", "relu_1", "conv_2", "bn_2", "relu_2", "conv_3", "bn_3", "relu_3", "conv_4", "bn_4", "relu_4", "conv_5", "upsample_4x", "skip_6", "sum_6", "upsample_2x" ], "kernel_size": [3, 0, 0, 3, 0, 0, 3, 0, 0, 3, 0, 0, 1, 4, 1, 0, 4], # Fill filter size for relu and sum as well since skip layers and others use them "num_filters": [ 16, 16, 16, 32, 32, 32, 64, 64, 64, 128, 128, 128, 36, 36, 36, 36, 36 ], "stride": [1, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 1, 4, 1, 0, 2], "layer_type": [ 'conv', 'bn', 'relu', 'conv', 'bn', 'relu', 'conv', 'bn', 'relu', 'conv', 'bn', 'relu', 'conv', 'convt', 'skip', 'sum', 'convt' ], "input": [-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 5, (14, 13), 15], "pad": [1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1] } # specify train_opt train_opts = { "lr": 0.1, "weight_decay": 0.001, "batch_size": 24, "momentum": 0.9, "num_epochs": 34, "step_size": 30, "gamma": 0.1, "objective": CrossEntropyLoss() } model = SemanticSegmentationBase(netspec_opts) model.to(device) elif model_type == "improved": # specify netspec_opts netspec_opts = { "name": [ "conv_1", "bn_1", "relu_1", 'pool_1', "conv_2", "bn_2", "relu_2", "pool_2", "conv_3", "bn_3", "relu_3", "pool_3", "conv_4", "bn_4", "relu_4", "drop_1", "conv_5", "upsample_4x", "skip_6", "sum_6", "skip_10", "upsample_skip_10", "sum_10", "upsample_2x" ], "kernel_size": [ 3, 0, 0, 2, 3, 0, 0, 2, 3, 0, 0, 2, 3, 0, 0, 0, 1, 4, 1, 0, 1, 4, 0, 4 ], # Fill filter size for relu and sum as well since skip layers and others use them "num_filters": [ 128, 128, 128, 128, 256, 256, 256, 256, 512, 512, 512, 512, 1024, 1024, 1024, 1024, 36, 36, 36, 36, 36, 36, 36, 36 ], "stride": [ 1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0, 0, 1, 4, 1, 0, 1, 2, 0, 2 ], "layer_type": [ 'conv', 'bn', 'relu', 'pool', 'conv', 'bn', 'relu', 'pool', 'conv', 'bn', 'relu', 'pool', 'conv', 'bn', 'relu', 'drop', 'conv', 'convt', 'skip', 'sum', 'skip', 'convt', 'sum', 'convt' ], "input": [ -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 6, (18, 17), 10, 20, (21, 19), 22 ], "pad": [ 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1 ] } # specify train_opts train_opts = { "lr": 0.1, "weight_decay": 0.001, "batch_size": 24, "momentum": 0.9, "num_epochs": 12, "step_size": [7, 10], "gamma": 0.1, "objective": CrossEntropyLoss(classcount.float()) } model = SemanticSegmentationImproved(netspec_opts) model.to(device) CNN_model_params = torch.load( 'improved_state_dict_CNN(128,256,512).pt') model_params = model.state_dict().copy() #print(model.state_dict().keys()) #for p in model.named_parameters(): #print(p) #i = 0 #freezelayers = {0,1,2,3,4,5} #for p in model.named_parameters(): #if i in freezelayers: #print(p) #i = i+1 model_params['net.conv_1.weight'] = CNN_model_params['conv0.weight'] model_params['net.conv_1.bias'] = CNN_model_params['conv0.bias'] model_params['net.bn_1.weight'] = CNN_model_params['bn1.weight'] model_params['net.bn_1.bias'] = CNN_model_params['bn1.bias'] #model_params['net.bn_1.running_mean'] = CNN_model_params[ 'bn1.running_mean'] #model_params['net.bn_1.running_var'] = CNN_model_params[ 'bn1.running_var'] #model_params['net.bn_1.num_batches_tracked'] = CNN_model_params['bn1.num_batches_tracked'] model_params['net.conv_2.weight'] = CNN_model_params['conv4.weight'] model_params['net.conv_2.bias'] = CNN_model_params['conv4.bias'] #model_params['net.bn_2.weight'] = CNN_model_params['bn5.weight'] #model_params['net.bn_2.bias'] = CNN_model_params['bn5.bias'] #model_params['net.bn_2.running_mean'] = CNN_model_params[ 'bn5.running_mean'] #model_params['net.bn_2.running_var'] = CNN_model_params[ 'bn5.running_var'] #model_params['net.bn_2.num_batches_tracked'] = CNN_model_params['bn5.num_batches_tracked'] #model_params['net.conv_3.weight'] = CNN_model_params['conv8.weight'] #model_params['net.conv_3.bias'] = CNN_model_params['conv8.bias'] #model_params['net.bn_3.weight'] = CNN_model_params['bn9.weight'] #model_params['net.bn_3.bias'] = CNN_model_params['bn9.bias'] #model_params['net.bn_3.running_mean'] = CNN_model_params[ 'bn9.running_mean'] #model_params['net.bn_3.running_var'] = CNN_model_params[ 'bn9.running_var'] #model_params['net.bn_3.num_batches_tracked'] = CNN_model_params['bn9.num_batches_tracked'] model.load_state_dict(model_params) index = 0 freezelayers = {0, 1, 2, 3, 4, 5} for p in model.parameters(): if index in freezelayers: p.requires_grad = False index += 1 #for p in model.named_parameters(): #print(p) else: raise ValueError(f"Error: unknown model type {model_type}") # train the model train(model, train_dl, val_dl, train_opts, exp_dir=exp_dir) # save model's state and architecture to the base directory model = {"state": model.state_dict(), "specs": netspec_opts} save(model, f"{model_type}_semantic-model.pt") plt.savefig(f"{model_type}_semantic.png") plt.show()
def semantic_segmentation(model_type="base"): """ sets up and trains a semantic segmentation model Arguments --------- model_type: (String) a string in {'base', 'improved'} specifying the targeted model type """ # the dataset train_dl, val_dl = create_dataset("semantic_segmentation_dataset.pt") # an optional export directory exp_dir = f"{model_type}_models" if model_type == "base": # specify netspec_opts netspec_opts = { "name": [ "conv_1", "bn_1", "relu_1", "conv_2", "bn_2", "relu_2", "conv_3", "bn_3", "relu_3", "conv_4", "bn_4", "relu_4", "conv_5", "upsample_4x", "skip_6", "sum_6", "upsample_2x" ], "kernel_size": [3, 0, 0, 3, 0, 0, 3, 0, 0, 3, 0, 0, 1, 4, 1, 0, 4], # Fill filter size for relu and sum as well since skip layers and others use them "num_filters": [ 16, 16, 16, 32, 32, 32, 64, 64, 64, 128, 128, 128, 36, 36, 36, 36, 36 ], "stride": [1, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 1, 4, 1, 0, 2], "layer_type": [ 'conv', 'bn', 'relu', 'conv', 'bn', 'relu', 'conv', 'bn', 'relu', 'conv', 'bn', 'relu', 'conv', 'convt', 'skip', 'sum', 'convt' ], "input": [-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 5, (14, 13), 15], "pad": [1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1] } # specify train_opt train_opts = { "lr": 0.1, "weight_decay": 0.001, "batch_size": 24, "momentum": 0.9, "num_epochs": 34, "step_size": 30, "gamma": 0.1, "objective": CrossEntropyLoss() } model = SemanticSegmentationBase(netspec_opts) # elif model_type == "improved": # specify netspec_opts # specify train_opts # model = SemanticSegmentationImproved(netspec_opts) else: raise ValueError(f"Error: unknown model type {model_type}") # train the model train(model, train_dl, val_dl, train_opts, exp_dir=exp_dir) # save model's state and architecture to the base directory model = {"state": model.state_dict(), "specs": netspec_opts} save(model, f"{model_type}_semantic-model.pt") plt.savefig(f"{model_type}_semantic.png") plt.show()
import matplotlib.pyplot as plt from create_dataset import create_dataset from best_fit_slope_y_intercept import best_fit_slope_y_intercept from coefficient_of_determination import coefficient_of_determination # crate random dataset xs, ys = create_dataset(50, 30, correlation='pos') # calculating m and b as per the algorithm m, b = best_fit_slope_y_intercept(xs, ys) # getting the regression line by using all of the x to 'y = mx + c' regression_line = [(m * x) + b for x in xs] # predicting a value x_predict = 55 y_predict = m * x_predict + b print(y_predict) # calculating the accuracy r_squared = coefficient_of_determination(ys, regression_line) print(r_squared) # plotting the points and regression line plt.scatter(xs, ys, marker='.') plt.plot(xs, regression_line) plt.scatter(x_predict, y_predict, color='red', marker='x') plt.show()
from reading_data import reading_files, reading_files_test from create_dataset import create_dataset from UMLS_methods import * from tqdm import tqdm import pandas as pd import math # load training data train_data, CUI, iCUI = reading_files("./train") train_df = create_dataset(train_data) # load test data test_data, _ = reading_files_test("./testing") test_df = create_dataset(test_data) def fill_test_data(test_df, iCUI, num): test_df.at[:, 'prediction'] = None test_df.at[:, 'prediction_source'] = None test_df.at[:, 'prediction_name'] = None for i in tqdm(test_df.index, desc=str(num)): tgt = get_tgt() st = get_st(tgt) mention = test_df.loc[i]['mention'] if mention in iCUI: # iterating train data if 'CUI-less' in iCUI[mention]: test_df.at[i, 'prediction'] = 'CUI-less' test_df.at[i, 'prediction_source'] = 'train_data' elif len(iCUI[mention]) == 1: test_df.at[i, 'prediction'] = list(iCUI[mention])[0] test_df.at[i, 'prediction_source'] = 'train_data'
parser.add_argument('-v', type=int, default=0, metavar='N', help='Verbosity (0 = all information, else = nothing).') parser.add_argument('-n', type=int, default=18, metavar='N', help='Number of qubits.') parser.add_argument('--result', type=str, default='result/', metavar='result/', help='Directory for output files.') parser.add_argument('--pretrained', type=str, default=False, metavar='False', help='Load pretrained model.') parser.add_argument('--param', type=str, default='param/parameters.json', metavar='param/param.json', help='Parameter file path.') args = parser.parse_args() # Read parameter JSON file, convert it into a Python dictionary with open(args.param) as f: parameters = json.loads(f.read()) f.close() # Create dataset if not available locally (only takes a minute or three) if not os.path.exists('data/easy_dataset.npz') or not os.path.exists('data/hard_dataset.npz') or not os.path.exists('data/random_dataset.npz'): print("Creating dataset, please wait one moment.") create_dataset(n_qubits=args.n) else: print("Dataset found.") # Load and plot fidelities or training for state in ['easy', 'random', 'hard']: fs = [] for i in range(1, 6): m = None if args.pretrained == 'True': m = Model(parameters, verbosity = args.v, state=state, n_qubits=args.n, n_layers=i, load=f"results/saved_model_{state}_L{i}") else: m = Model(parameters, verbosity = args.v, state=state, n_qubits=args.n, n_layers=i) fs.append(m.fidelity) m.plot_fidelities(fs, state=state)
from reading_data import reading_files, reading_files_test from create_dataset import create_dataset from UMLS_methods import * from tqdm import tqdm import pandas as pd import math # load training data train_data, CUI, iCUI = reading_files("./train") train_df = create_dataset(train_data) def fill_train_data(df, num): df.at[:, 'prediction'] = None df.at[:, 'prediction_source'] = None df.at[:, 'prediction_name'] = None for i in tqdm(df.index, desc=str(num)): tgt = get_tgt() st = get_st(tgt) mention = df.loc[i]['mention'] CUIs = find_mention_in_UMLS_partial_name(mention, st) if len(CUIs) >= 1 and CUIs[0]['cui'] != 'NONE': if len(CUIs) == 1: df.at[i, 'prediction'] = CUIs[0]['cui'] df.at[i, 'prediction_name'] = CUIs[0]['name'] df.at[i, 'prediction_source'] = 'UMLS_partial' else: df.at[i, 'prediction'] = [ CUIs[_]['cui'] for _ in range(len(CUIs)) ] df.at[i, 'prediction_name'] = [
from create_dataset import create_dataset from rnn import lstm from fastNLP import Trainer from fastNLP import CrossEntropyLoss from fastNLP import AccuracyMetric vocab, train_data, dev_data, test_data = create_dataset() model = lstm(vocab_size=len(vocab), embedding_length=200, hidden_size=128, output_size=20) model.cuda() loss = CrossEntropyLoss(pred='pred', target='target') metrics = AccuracyMetric(pred='pred', target='target') trainer = Trainer(model=model, train_data=train_data, dev_data=dev_data, loss=loss, metrics=metrics, save_path='./', device=0, n_epochs=20) trainer.train()
print "class weights" print class_weights_dict return class_weights_dict utils_files_dir = 'util_files' batch_size = 16 if not os.path.exists(utils_files_dir): os.mkdir(utils_files_dir) if not os.path.exists(os.path.join(utils_files_dir, "train.json")): logger.info("creating and preparing data for training and testing.") create_dataset() class_weights = get_class_weights() if not os.path.exists(os.path.join(utils_files_dir, "model.h5")): logger.info("creating model.") model = get_model() model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc']) train = dict(json.load(open('util_files/train.json'))) train_data = train['data'] len_training_data = len(train_data) valid = dict(json.load(open('util_files/valid.json')))
ITER_PRUNING = args.iter_pruning # Number of pruning iterations BASELINE_ACC = args.baseline_acc # Accuracy baseline, stop pruning iterations if testAcc is lower than it LR = args.lr # Learning Rate BATCH_SIZE = args.batch_size EPOCHS = args.epochs # Early stopping activated, EPOCHS can be high # Misc parameters FRAC_DATA = args.frac_data # Take {FRAC_DATA}% of the dataset DATA_AUGMENT = args.da_mode # 'cutmix' or 'random_crop' or 'mixup' or '' RANDOM_STATE = 17 random.seed(RANDOM_STATE) ### Data processing ### dataloaders_length = create_dataset(batch_size=BATCH_SIZE, frac_data=FRAC_DATA, random_state=RANDOM_STATE, data_augment=DATA_AUGMENT) trainloader = torch.load(f'{DATA_PATH}train_data.pt') validationloader = torch.load(f'{DATA_PATH}val_data.pt') testloader = torch.load(f'{DATA_PATH}test_data.pt') dataloaders = { "train": trainloader, "val": validationloader, "test": testloader } dataset_sizes = { "train": dataloaders_length[0], "val": dataloaders_length[1],
def _main(): parser = _create_parser() args = parser.parse_args() set_names = ["train", "valid", "test"] if args.only_train: set_names = [set_names[0]] if args.only_set is not None: set_names = [args.only_set] input_paths = [ os.path.join(args.rotowire_dir, f + ".json") for f in set_names ] if args.activity == _extract_activity_descr: output_paths, all_named_entities, cell_dict_overall, max_table_length = _prepare_for_extract( args, set_names) elif args.activity == _create_dataset_descr: input_paths, output_paths, total_vocab, max_table_length, \ max_summary_length, max_plan_length = create_prepare(args, set_names, input_paths) elif args.activity == _gather_stats_descr: output_paths = set_names logger = Logger(log=args.log) train_dict = None for input_path, output_path in zip(input_paths, output_paths): if args.activity == _extract_activity_descr: print(f"working with {input_path}, extracting to {output_path}") mtl = extract_summaries_from_json( input_path, output_path, logger, transform_player_names=args.transform_players, prepare_for_bpe_training=args.prepare_for_bpe_training, prepare_for_bpe_application=args.prepare_for_bpe_application, exception_cities=args.exception_cities, exception_teams=args.exception_teams, lowercase=args.lowercase, words_limit=args.words_limit, all_named_entities=all_named_entities, cell_dict_overall=cell_dict_overall) if mtl > max_table_length: max_table_length = mtl elif args.activity == _gather_stats_descr: print(f"working with {input_path}") if os.path.basename(input_path) == "train.json": train_dict = gather_json_stats( input_path, logger, transform_player_names=args.transform_players) if args.five_occurrences: train_dict = train_dict.sort(prun_occurrences=5) else: gather_json_stats( input_path, logger, train_dict, transform_player_names=args.transform_players) elif args.activity == _create_dataset_descr: create_dataset(input_path, output_path, total_vocab, max_plan_length=max_plan_length, max_summary_length=max_summary_length, max_table_length=max_table_length, logger=logger) if args.activity == _extract_activity_descr and args.entity_vocab_path is not None: all_named_entities.sort().save(args.entity_vocab_path) if args.activity == _extract_activity_descr and args.cell_vocab_path is not None: cell_dict_overall.sort().save(args.cell_vocab_path) if args.activity == _extract_activity_descr and args.config_path is not None: with open(args.config_path, "w") as f: print(max_table_length, file=f)
from multiclass_3D_CNN import buildModel from create_dataset import create_dataset #from create_dataset_copy import create_dataset config = tf.ConfigProto() os.environ["CUDA_VISIBLE_DEVICES"] = "0" #config.gpu_options.per_process_gpu_memory_fraction = 0.5 config.gpu_options.allow_growth = True tf.keras.backend.set_session(tf.Session(config=config)) if __name__ == '__main__': print('begin creating the input dataset') data_dir = '/home/d1274/no_backup/d1274/data' datagen = create_dataset(data_dir) # The image shape is (236, 320, 260) #model = Resnet3DBuilder.build_resnet_50((64, 64, 64, 1), 3) model = buildModel((64, 64, 64, 1), 3) print(model.summary()) model.compile(loss="categorical_crossentropy", optimizer="sgd", metrics=['acc']) model_dir = os.path.join('/home/d1274/no_backup/d1274/model', "IQA_test_with_tfrecord") os.makedirs(model_dir, exist_ok=True) print("model_dir: ", model_dir) est_iqa = tf.keras.estimator.model_to_estimator(keras_model=model,
marker='o', s=4) ax.scatter([a[indeces_plot[0]], b[indeces_plot[0]]], [a[indeces_plot[1]], b[indeces_plot[1]]], marker='*', s=100) plt.figure() fig = plt.gcf() ax = fig.gca() ax.scatter(data_sample[indeces_plot[2], :], data_sample[indeces_plot[3], :], marker='o', s=4) ax.scatter([a[indeces_plot[2]], b[indeces_plot[2]]], [a[indeces_plot[3]], b[indeces_plot[3]]], marker='*', s=100) sampler_global = region_sampler() generate_data_from_coefs(file_name, initial_coef, sampler_global, assign_region, size_dataset, n_parameters) file_name = create_dataset(n_parameters, assign_region, n_parameter_region, size_dataset, file_name=file_name, initial_coef=initial_coef) stophere