Exemple #1
0
def test_inference(model, test_data_path):
    ds_test = create_dataset(test_data_path).create_dict_iterator()
    data = next(ds_test)
    images = data["image"].asnumpy()
    labels = data["label"].asnumpy()

    output = model.predict(Tensor(data["image"]))
    pred = numpy.argmax(output.asnumpy(), axis=1)
    err_num = []
    index = 1
    for i in range(len(labels)):
        plt.subplot(4, 8, i + 1)
        color = 'blue' if pred[i] == labels[i] else 'red'
        plt.title("pre:{}".format(pred[i]), color=color)
        plt.imshow(numpy.squeeze(images[i]))
        plt.axis("off")
        if color == 'red':
            index = 0
            print(
                "Row {}, column {} is incorrectly identified as {}, the correct value should be {}"
                .format(int(i / 8) + 1, i % 8 + 1, pred[i], labels[i]), '\n')
    if index:
        print("All the figures in this group are predicted correctly")
    print(pred, "<--Predicted figures")
    print(labels, "<--The right number")
    plt.show()
def make_ds(Filenames,context,zero_delimiter = False):
    X,Y = 0,0

    if zero_delimiter:
        xdel = np.zeros((1, context, 257))
        ydel = np.zeros((1, context, 257 * 5))
    for fp in Filenames:

        real_wiener_all, wiener_all = readmat(fp, read='inout', keys=('Input', 'Output'))

        dslength = len(wiener_all)

        X_ = np.zeros((dslength, 1, 257 * 5))
        Y_ = np.zeros((dslength, 1, 257))


        for i in range(np.size(X_, 0)):
            x = wiener_all[i].reshape((1, 257 * 5))
            # x = np.array([max(x[0][i],-35) for i in range (1285)])
            X_[i][0] = x


        for i in range(np.size(Y_, 0)):
            y = real_wiener_all[i].reshape((1, 257))
            # y = np.array([max(y[0][i], -35) for i in range(257)])
            Y_[i][0] = y

        if context:
            X_,Y_ = create_dataset(X_,Y_,context)

            X_ = np.squeeze(X_)
            Y_ = np.squeeze(Y_)

            if zero_delimiter:
               newX_ = np.zeros((len(X_)+1,context,257*5))
               newY_ = np.zeros((len(Y_)+1,context,257))

               newX_[1:len(X_)+1,:,:] = X_
               newY_[1:len(Y_)+1,:,:] = Y_
               X_=newX_
               Y_ =newY_






        if type(X) != type(X_):
            X = X_
            Y = Y_
        else:
            X = np.concatenate((X, X_))
            Y = np.concatenate((Y, Y_))
            print(X.shape)



    return X,Y
Exemple #3
0
def test_net(network, model, mnist_path):
    """Define the evaluation method"""

    print("==================== Starting Testing ===============")
    param_dict = load_checkpoint(
        "./model/ckpt/mindspore_quick_start/checkpoint_lenet-1_1874.ckpt")
    load_param_into_net(network, param_dict)
    ds_eval = create_dataset(os.path.join(mnist_path, "test"))
    acc = model.eval(ds_eval, dataset_sink_mode=False)
    print("==================== Accuracy:{} ===============".format(acc))
def create_datasets():
    X_train, Y_train, X_test, Y_test = cr.create_dataset()

    Y_train = Y_train.reshape(-1, 3, 32, 32)
    X_train = X_train.reshape(-1, 3, 32, 32)

    Y_test = Y_test.reshape(-1, 3, 32, 32)
    X_test = X_test.reshape(-1, 3, 32, 32)

    return X_train.astype(np.float32), Y_train.astype(np.float32), X_test.astype(np.float32), Y_test.astype(np.float32)
Exemple #5
0
def train_and_eval_crf(date_file,save_file,save=True):
    '''Train and evaluate a CRF model. Option to save F1 Score.'''
    x,y = create_dataset(data_file,"CRF")
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=42)
    crf = train_crf(x_train,y_train)
    f1 = eval_crf(crf,x_test,y_test)
    if save:
        desc = "Simple CRF Model"
        note = "None"
        save_f1(save_file,f1,len(x),desc,note)
    return
Exemple #6
0
    def __init__(self, dataset_name="dataset_hrl.pk"):
        # Dataset = create_dataset()
        # pickle.dump(Dataset, open("dataset_hrl.pk", "wb"))
        import pickle
        if os.path.exists(dataset_name):
            self.data = pickle.load(open(dataset_name, "rb"))
        else:
            from create_dataset import create_dataset
            self.data = create_dataset()
            pickle.dump(self.data, open(dataset_name, "wb"))

        self.data = self.data
        self.index = 0
        self.dataset_size = len(self.data)
Exemple #7
0
def train_and_eval_BiLSTM(data_file,save_file,save=True,embed_size=100,epochs=50,batch_size=32,val_size=0.1):
    '''Train and evaluate a Keras BiLSTM Model. Option to save F1 Score.'''
    x,y,word_ids,tag_ids = create_dataset(data_file,"LSTM")
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=42)
    custom_emb = Word2Vec.load("./data/word2vec_numeric_encs.model")
    embed_matrix,_ = create_weight_matrix(word_ids,custom_emb)
    model = create_BiLSTM(len(word_ids),len(tag_ids),embed_size,50,embed_matrix)
    history = train_BiLSTM(model,x_train,y_train,batch_size,epochs,val_size)
    f1 = eval_BiLSTM(model,x_test,y_test,tag_ids)
    if save:
        desc = f"BiLSTM-EmbedSize-{embed_size}-Word2Vec"
        note = "Word2Vec Embeddings"
        save_f1(save_file,f1,len(x),desc,note)
    return 
Exemple #8
0
 def __init__(self, filenames):
     """
     constructor to create a sequenceModel object 
     identify the number of records in the dataset
     """
     print("\n", "+" * 10, filenames, "+" * 10, "\n")
     tf.reset_default_graph()
     with tf.name_scope('seqModel'):
         # get number of records in the dataset
         # self.num_records = sum(1 for _ in tf.data.TFRecordDataset(filenames))
         # tf.disable_eager_execution()
         # create a dataset iterator to pass to the model
         next_element, dataset_init_op = create_dataset(filenames)
         # use iterator generator to create data variables
         self.createDataIterators(next_element)
         self.createParams()
         self.dataset_init_op = dataset_init_op
     self.sess = tf.Session()
Exemple #9
0
def create_model_metadata():
    aid = create_architecture.create_architecture()
    did = create_dataset.create_dataset()

    url = 'http://localhost:8080/api/v1/model'

    model = {
        "is_public": True,
        "title": "Test Model {id}".format(id=int(time.time()) % 1000),
        "description": "Test test test",
        "labels": ["a", "b", "c"],
        "architecture": aid,
        "dataset": did
    }

    resp = utils.post(url, json=model)

    print('status:', resp.status_code, 'data:', resp.text)

    if resp.status_code == 200:
        return resp.json()['id']
    def prepare(self):
        df = self.get_gcp_dataframe()
        dict_of_image_names_with_its_gcp_cordinates = {}
        pickle_exist = False
        dict_pickle_path = os.path.join('pickle', 'dict.pickle')
        if os.path.exists(dict_pickle_path):
            with open(dict_pickle_path, 'rb') as (f):
                dict_of_image_names_with_its_gcp_cordinates = pickle.load(f)
                pickle_exist = True
        for index, row in enumerate(df.iterrows()):
            if pickle_exist:
                break
            print(index)
            image_name: str = df['FileName'].iloc[index]
            gcp_location: str = df['GCPLocation'].iloc[index]
            print(gcp_location, image_name)
            path = os.path.join(self.data_set_path, image_name)
            if not os.path.exists(path):
                with open('not_found.txt', 'a') as the_file:
                    the_file.write(path + '\n')
                continue
            dict_of_image_names_with_its_gcp_cordinates = crop_image(
                path, gcp_location,
                dict_of_image_names_with_its_gcp_cordinates)

            with open(dict_pickle_path, 'wb') as (f):
                pickle.dump(dict_of_image_names_with_its_gcp_cordinates, f,
                            pickle.HIGHEST_PROTOCOL)

        data_set, target_set = create_dataset(
            dict_of_image_names_with_its_gcp_cordinates)

        print(target_set[0])
        print(data_set.shape)

        print("loading Model")
        model = ModelClass()
        model.create_model()
        model.train_model(data_set, target_set)
Exemple #11
0
import config
import create_dataset

import MODELS

from keras.callbacks import EarlyStopping, ModelCheckpoint

train_dir = config.dir.train_dir

height = config.standard_vals.height
width = config.standard_vals.width
channels = config.standard_vals.channels

(X_train, X_val, y_train,
 y_val) = create_dataset.create_dataset(train_dir, height, width, channels)

# creating one hot encoded labels
targets_series = pd.Series(y_train)
one_hot = pd.get_dummies(targets_series, sparse=True)
y_train = np.asarray(one_hot)

targets_series = pd.Series(y_val)
one_hot = pd.get_dummies(targets_series, sparse=True)
y_val = np.asarray(one_hot)

#model = MODEL.inceptionv3(height,width,channels)
model = MODEL.simple_model(height, width, channels)

early_stop = EarlyStopping(monitor='val_loss', patience=4, verbose=1)
checkpointer = ModelCheckpoint(filepath='cnnbest.hdf5',
Exemple #12
0
    - direction would just be south, east etc. with numbers 0-3 maybe.
    - randomly skip some squares to get test set?
    """

    parser = argparse.ArgumentParser()
    parser.add_argument('--model', type=str)
    parser.add_argument('--phase', type=str, default="train")

    args = parser.parse_args()
    model_name = args.model
    phase = args.phase

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    amount = 30
    if model_name in ['viewpool', 'multiple']:
        image_datasets = create_dataset.create_dataset(multiple=True,
                                                       amount=amount)
    else:
        image_datasets = create_dataset.create_dataset(amount=amount)

    dataloaders = {
        x: torch.utils.data.DataLoader(image_datasets[x],
                                       batch_size=8,
                                       shuffle=True,
                                       num_workers=4)
        for x in ['train', 'val', 'test']
    }

    dataset_sizes = {
        x: len(image_datasets[x])
        for x in ['train', 'val', 'test']
    }
Exemple #13
0
def create_d():
    Chromosome().replace_exon()
    os.system('./grab_sequence.sh')
    create_dataset('train', 'all')
Exemple #14
0
    # create the network
    network = LeNet5()

    # define the optimizer
    net_opt = nn.Momentum(network.trainable_params(), lr, momentum)

    # define the loss function
    net_loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')

    epoch_size = 5
    mnist_path = "./MNIST/"
    model_path = "./model/ckpt/mindspore_quick_start/"

    repeat_size = 1
    ds_train = create_dataset(os.path.join(mnist_path, "train"), 32,
                              repeat_size)
    ds_eval = create_dataset(os.path.join(mnist_path, "test"), 32)

    # clean up old run files before in Linux
    os.system('rm -rf {0}*.ckpt {0}*.meta {0}*.pb'.format(model_path))

    # define the model
    model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()})

    # save the network model and parameters for subsquenece fine-tuning
    config_ck = CheckpointConfig(save_checkpoint_steps=375,
                                 keep_checkpoint_max=16)

    # group layers into an object whith tarining and evaluation features
    ckpoint_cb = ModelCheckpoint(prefix="checkpoint_lenet",
                                 directory=model_path,
Exemple #15
0
import torch
import models
import create_dataset


if __name__ == '__main__': 
    device = 'cuda:0'

    model = models.create_basic_model(device)
    model.load_state_dict(torch.load("models/basic_model.pt"))
    model.to(device)
    model.eval()

    image_datasets = create_dataset.create_dataset(False, 20)

    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=1,
                                                shuffle=True, num_workers=1)
                for x in ['train', 'val', 'test']}

    dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val', 'test']}

    preds = []
    gts = []

    for im, trgt in dataloaders['test']:
        pred = torch.argmax(model(im.to(device)))

        preds.append(pred.to('cpu'))
        gts.append(trgt)

    preds = torch.Tensor(preds)
def cnn_categorization(model_type="base",
                       data_path="image_categorization_dataset.pt",
                       contrast_normalization=False,
                       whiten=False):
    """
    Invokes the dataset creation, the model construction and training functions

    Arguments
    --------
    model_type: (string), the type of model to train. Use 'base' for the base model and 'improved for the improved model. Default: base
    data_path: (string), the path to the dataset. This argument will be passed to the dataset creation function
    contrast_normalization: (boolean), specifies whether or not to do contrast normalization
    whiten: (boolean), specifies whether or not to whiten the data.

    """
    # Do not change the output path
    # but you can uncomment the exp_dir if you do not want to save the model checkpoints
    output_path = "{}_image_categorization_dataset.pt".format(model_type)
    exp_dir = "./{}_models".format(model_type)

    train_ds, val_ds = create_dataset(data_path, output_path,
                                      contrast_normalization, whiten)

    # specify the network architecture and the training policy of the models under
    # the respective blocks
    if model_type == "base":
        # create netspec_opts
        netspec_opts = {
            "kernel_size": [3, 0, 0, 3, 0, 0, 3, 0, 0, 8, 1],
            "num_filters": [16, 16, 0, 32, 32, 0, 64, 64, 0, 0, 16],
            "stride": [1, 0, 0, 2, 0, 0, 2, 0, 0, 1, 1],
            "layer_type": [
                "conv", "bn", "relu", "conv", "bn", "relu", "conv", "bn",
                "relu", "pool", "conv"
            ]
        }
        # create train_opts
        train_opts = {
            "lr": 0.1,
            "weight_decay": 0.0001,
            "batch_size": 128,
            "momentum": 0.9,
            "num_epochs": 25,
            "step_size": 20,
            "gamma": 0.1
        }
        # create model base on tetspect_opts
        model = cnn_categorization_base(netspec_opts)

    elif model_type == "improved":
        # create netspec_opts
        netspec_opts = {
            "kernel_size": [3, 0, 0, 2, 3, 0, 0, 2, 3, 0, 0, 8, 1],
            "num_filters": [32, 32, 0, 0, 64, 64, 0, 0, 128, 128, 0, 0, 16],
            "stride": [1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0, 1, 1],
            "layer_type": [
                "conv", "bn", "relu", "pool", "conv", "bn", "relu", "pool",
                "conv", "bn", "relu", "pool", "conv"
            ]
        }
        # create train_opts
        train_opts = {
            "lr": 0.1,
            "weight_decay": 0.0001,
            "batch_size": 128,
            "momentum": 0.9,
            "num_epochs": 25,
            "step_size": 20,
            "gamma": 0.1
        }
        # create improved model
        model = cnn_categorization_improved(netspec_opts)
    else:
        raise ValueError(f"Error: unknown model type {model_type}")

    # uncomment the line below if you wish to resume training of a saved model
    # model.load_state_dict(load(PATH to state))

    # train the model
    train(model, train_ds, val_ds, train_opts, exp_dir)

    # save model's state and architecture to the base directory
    state_dictionary_path = f"{model_type}_state_dict.pt"
    save(model.state_dict(), state_dictionary_path)
    model = {"state": state_dictionary_path, "specs": netspec_opts}
    save(model, "{}-model.pt".format(model_type))

    plt.savefig(f"{model_type}-categorization.png")
    plt.show()
from create_dataset import create_dataset
from utils import distrib

train, val = create_dataset('semantic_segmentation_dataset.pt')

classcount, rgbmean = distrib(val)

from create_dataset import create_dataset, download_data
import numpy as np

# download_data(num_weeks=100)

dataset = np.loadtxt('dataset.csv', delimiter=',')  # 0~16238

train_dataset = dataset[:-5000]
test_dataset = dataset[-5000:]

trainX, trainY = create_dataset(train_dataset, lookback=24)
testX, testY = create_dataset(test_dataset, lookback=24)

print(trainX.shape)
print(trainY.shape)
Exemple #19
0
def semantic_segmentation(model_type="base"):
    """
    sets up and trains a semantic segmentation model

    Arguments
    ---------
    model_type:  (String) a string in {'base', 'improved'} specifying the targeted model type
    """

    # the dataset
    train_dl, val_dl = create_dataset("semantic_segmentation_dataset.pt")

    # an optional export directory
    exp_dir = f"{model_type}_models"

    classcount, rgbmean = distrib(train_dl)

    classcount = 1 / classcount
    classcount = classcount.to(device)
    if model_type == "base":
        # specify netspec_opts
        netspec_opts = {
            "name": [
                "conv_1", "bn_1", "relu_1", "conv_2", "bn_2", "relu_2",
                "conv_3", "bn_3", "relu_3", "conv_4", "bn_4", "relu_4",
                "conv_5", "upsample_4x", "skip_6", "sum_6", "upsample_2x"
            ],
            "kernel_size": [3, 0, 0, 3, 0, 0, 3, 0, 0, 3, 0, 0, 1, 4, 1, 0, 4],
            # Fill filter size for relu and sum as well since skip layers and others use them
            "num_filters": [
                16, 16, 16, 32, 32, 32, 64, 64, 64, 128, 128, 128, 36, 36, 36,
                36, 36
            ],
            "stride": [1, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 1, 4, 1, 0, 2],
            "layer_type": [
                'conv', 'bn', 'relu', 'conv', 'bn', 'relu', 'conv', 'bn',
                'relu', 'conv', 'bn', 'relu', 'conv', 'convt', 'skip', 'sum',
                'convt'
            ],
            "input":
            [-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 5, (14, 13), 15],
            "pad": [1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1]
        }
        # specify train_opt
        train_opts = {
            "lr": 0.1,
            "weight_decay": 0.001,
            "batch_size": 24,
            "momentum": 0.9,
            "num_epochs": 34,
            "step_size": 30,
            "gamma": 0.1,
            "objective": CrossEntropyLoss()
        }

        model = SemanticSegmentationBase(netspec_opts)
        model.to(device)

    elif model_type == "improved":

        # specify netspec_opts
        netspec_opts = {
            "name": [
                "conv_1", "bn_1", "relu_1", 'pool_1', "conv_2", "bn_2",
                "relu_2", "pool_2", "conv_3", "bn_3", "relu_3", "pool_3",
                "conv_4", "bn_4", "relu_4", "drop_1", "conv_5", "upsample_4x",
                "skip_6", "sum_6", "skip_10", "upsample_skip_10", "sum_10",
                "upsample_2x"
            ],
            "kernel_size": [
                3, 0, 0, 2, 3, 0, 0, 2, 3, 0, 0, 2, 3, 0, 0, 0, 1, 4, 1, 0, 1,
                4, 0, 4
            ],
            # Fill filter size for relu and sum as well since skip layers and others use them
            "num_filters": [
                128, 128, 128, 128, 256, 256, 256, 256, 512, 512, 512, 512,
                1024, 1024, 1024, 1024, 36, 36, 36, 36, 36, 36, 36, 36
            ],
            "stride": [
                1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0, 0, 1, 4, 1, 0, 1,
                2, 0, 2
            ],
            "layer_type": [
                'conv', 'bn', 'relu', 'pool', 'conv', 'bn', 'relu', 'pool',
                'conv', 'bn', 'relu', 'pool', 'conv', 'bn', 'relu', 'drop',
                'conv', 'convt', 'skip', 'sum', 'skip', 'convt', 'sum', 'convt'
            ],
            "input": [
                -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
                6, (18, 17), 10, 20, (21, 19), 22
            ],
            "pad": [
                1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
                1, 0, 1
            ]
        }
        # specify train_opts
        train_opts = {
            "lr": 0.1,
            "weight_decay": 0.001,
            "batch_size": 24,
            "momentum": 0.9,
            "num_epochs": 12,
            "step_size": [7, 10],
            "gamma": 0.1,
            "objective": CrossEntropyLoss(classcount.float())
        }

        model = SemanticSegmentationImproved(netspec_opts)
        model.to(device)
        CNN_model_params = torch.load(
            'improved_state_dict_CNN(128,256,512).pt')
        model_params = model.state_dict().copy()
        #print(model.state_dict().keys())
        #for p in model.named_parameters():
        #print(p)
        #i = 0
        #freezelayers = {0,1,2,3,4,5}
        #for p in model.named_parameters():
        #if i in freezelayers:
        #print(p)
        #i = i+1
        model_params['net.conv_1.weight'] = CNN_model_params['conv0.weight']
        model_params['net.conv_1.bias'] = CNN_model_params['conv0.bias']
        model_params['net.bn_1.weight'] = CNN_model_params['bn1.weight']
        model_params['net.bn_1.bias'] = CNN_model_params['bn1.bias']
        #model_params['net.bn_1.running_mean'] = CNN_model_params[ 'bn1.running_mean']
        #model_params['net.bn_1.running_var'] = CNN_model_params[ 'bn1.running_var']
        #model_params['net.bn_1.num_batches_tracked'] = CNN_model_params['bn1.num_batches_tracked']

        model_params['net.conv_2.weight'] = CNN_model_params['conv4.weight']
        model_params['net.conv_2.bias'] = CNN_model_params['conv4.bias']
        #model_params['net.bn_2.weight'] = CNN_model_params['bn5.weight']
        #model_params['net.bn_2.bias'] = CNN_model_params['bn5.bias']
        #model_params['net.bn_2.running_mean'] = CNN_model_params[ 'bn5.running_mean']
        #model_params['net.bn_2.running_var'] = CNN_model_params[ 'bn5.running_var']
        #model_params['net.bn_2.num_batches_tracked'] = CNN_model_params['bn5.num_batches_tracked']

        #model_params['net.conv_3.weight'] = CNN_model_params['conv8.weight']
        #model_params['net.conv_3.bias'] = CNN_model_params['conv8.bias']
        #model_params['net.bn_3.weight'] = CNN_model_params['bn9.weight']
        #model_params['net.bn_3.bias'] = CNN_model_params['bn9.bias']
        #model_params['net.bn_3.running_mean'] = CNN_model_params[ 'bn9.running_mean']
        #model_params['net.bn_3.running_var'] = CNN_model_params[ 'bn9.running_var']
        #model_params['net.bn_3.num_batches_tracked'] = CNN_model_params['bn9.num_batches_tracked']

        model.load_state_dict(model_params)
        index = 0
        freezelayers = {0, 1, 2, 3, 4, 5}
        for p in model.parameters():
            if index in freezelayers:
                p.requires_grad = False
            index += 1
        #for p in model.named_parameters():
        #print(p)
    else:
        raise ValueError(f"Error: unknown model type {model_type}")

    # train the model
    train(model, train_dl, val_dl, train_opts, exp_dir=exp_dir)

    # save model's state and architecture to the base directory
    model = {"state": model.state_dict(), "specs": netspec_opts}
    save(model, f"{model_type}_semantic-model.pt")

    plt.savefig(f"{model_type}_semantic.png")
    plt.show()
def semantic_segmentation(model_type="base"):
    """
    sets up and trains a semantic segmentation model

    Arguments
    ---------
    model_type:  (String) a string in {'base', 'improved'} specifying the targeted model type
    """

    # the dataset
    train_dl, val_dl = create_dataset("semantic_segmentation_dataset.pt")

    # an optional export directory
    exp_dir = f"{model_type}_models"

    if model_type == "base":
        # specify netspec_opts
        netspec_opts = {
            "name": [
                "conv_1", "bn_1", "relu_1", "conv_2", "bn_2", "relu_2",
                "conv_3", "bn_3", "relu_3", "conv_4", "bn_4", "relu_4",
                "conv_5", "upsample_4x", "skip_6", "sum_6", "upsample_2x"
            ],
            "kernel_size": [3, 0, 0, 3, 0, 0, 3, 0, 0, 3, 0, 0, 1, 4, 1, 0, 4],
            # Fill filter size for relu and sum as well since skip layers and others use them
            "num_filters": [
                16, 16, 16, 32, 32, 32, 64, 64, 64, 128, 128, 128, 36, 36, 36,
                36, 36
            ],
            "stride": [1, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 1, 4, 1, 0, 2],
            "layer_type": [
                'conv', 'bn', 'relu', 'conv', 'bn', 'relu', 'conv', 'bn',
                'relu', 'conv', 'bn', 'relu', 'conv', 'convt', 'skip', 'sum',
                'convt'
            ],
            "input":
            [-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 5, (14, 13), 15],
            "pad": [1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1]
        }
        # specify train_opt
        train_opts = {
            "lr": 0.1,
            "weight_decay": 0.001,
            "batch_size": 24,
            "momentum": 0.9,
            "num_epochs": 34,
            "step_size": 30,
            "gamma": 0.1,
            "objective": CrossEntropyLoss()
        }

        model = SemanticSegmentationBase(netspec_opts)

    # elif model_type == "improved":

    # specify netspec_opts

    # specify train_opts

    # model = SemanticSegmentationImproved(netspec_opts)
    else:
        raise ValueError(f"Error: unknown model type {model_type}")

    # train the model
    train(model, train_dl, val_dl, train_opts, exp_dir=exp_dir)

    # save model's state and architecture to the base directory
    model = {"state": model.state_dict(), "specs": netspec_opts}
    save(model, f"{model_type}_semantic-model.pt")

    plt.savefig(f"{model_type}_semantic.png")
    plt.show()
import matplotlib.pyplot as plt
from create_dataset import create_dataset
from best_fit_slope_y_intercept import best_fit_slope_y_intercept
from coefficient_of_determination import coefficient_of_determination

# crate random dataset
xs, ys = create_dataset(50, 30, correlation='pos')

# calculating m and b as per the algorithm
m, b = best_fit_slope_y_intercept(xs, ys)

# getting the regression line by using all of the x to 'y = mx + c'
regression_line = [(m * x) + b for x in xs]

# predicting a value
x_predict = 55
y_predict = m * x_predict + b
print(y_predict)

# calculating the accuracy
r_squared = coefficient_of_determination(ys, regression_line)
print(r_squared)

# plotting the points and regression line
plt.scatter(xs, ys, marker='.')
plt.plot(xs, regression_line)
plt.scatter(x_predict, y_predict, color='red', marker='x')
plt.show()
from reading_data import reading_files, reading_files_test
from create_dataset import create_dataset
from UMLS_methods import *
from tqdm import tqdm
import pandas as pd
import math

# load training data
train_data, CUI, iCUI = reading_files("./train")
train_df = create_dataset(train_data)

# load test data
test_data, _ = reading_files_test("./testing")
test_df = create_dataset(test_data)


def fill_test_data(test_df, iCUI, num):
    test_df.at[:, 'prediction'] = None
    test_df.at[:, 'prediction_source'] = None
    test_df.at[:, 'prediction_name'] = None
    for i in tqdm(test_df.index, desc=str(num)):
        tgt = get_tgt()
        st = get_st(tgt)
        mention = test_df.loc[i]['mention']
        if mention in iCUI:  # iterating train data
            if 'CUI-less' in iCUI[mention]:
                test_df.at[i, 'prediction'] = 'CUI-less'
                test_df.at[i, 'prediction_source'] = 'train_data'
            elif len(iCUI[mention]) == 1:
                test_df.at[i, 'prediction'] = list(iCUI[mention])[0]
                test_df.at[i, 'prediction_source'] = 'train_data'
    parser.add_argument('-v', type=int, default=0, metavar='N', help='Verbosity (0 = all information, else = nothing).')
    parser.add_argument('-n', type=int, default=18, metavar='N', help='Number of qubits.')
    parser.add_argument('--result', type=str, default='result/', metavar='result/', help='Directory for output files.')
    parser.add_argument('--pretrained', type=str, default=False, metavar='False', help='Load pretrained model.')
    parser.add_argument('--param', type=str, default='param/parameters.json', metavar='param/param.json',
                        help='Parameter file path.')
    args = parser.parse_args()

    # Read parameter JSON file, convert it into a Python dictionary
    with open(args.param) as f:
        parameters = json.loads(f.read())
        f.close()

    # Create dataset if not available locally (only takes a minute or three)
    if not os.path.exists('data/easy_dataset.npz') or not os.path.exists('data/hard_dataset.npz') or not os.path.exists('data/random_dataset.npz'):
        print("Creating dataset, please wait one moment.")
        create_dataset(n_qubits=args.n)
    else:
        print("Dataset found.")

    # Load and plot fidelities or training
    for state in ['easy', 'random', 'hard']:
        fs = []
        for i in range(1, 6):
            m = None
            if args.pretrained == 'True':
                m = Model(parameters, verbosity = args.v, state=state, n_qubits=args.n, n_layers=i, load=f"results/saved_model_{state}_L{i}")
            else:
                m = Model(parameters, verbosity = args.v, state=state, n_qubits=args.n, n_layers=i)
            fs.append(m.fidelity)
        m.plot_fidelities(fs, state=state)
Exemple #24
0
from reading_data import reading_files, reading_files_test
from create_dataset import create_dataset
from UMLS_methods import *
from tqdm import tqdm
import pandas as pd
import math

# load training data
train_data, CUI, iCUI = reading_files("./train")
train_df = create_dataset(train_data)


def fill_train_data(df, num):
    df.at[:, 'prediction'] = None
    df.at[:, 'prediction_source'] = None
    df.at[:, 'prediction_name'] = None
    for i in tqdm(df.index, desc=str(num)):
        tgt = get_tgt()
        st = get_st(tgt)
        mention = df.loc[i]['mention']
        CUIs = find_mention_in_UMLS_partial_name(mention, st)
        if len(CUIs) >= 1 and CUIs[0]['cui'] != 'NONE':
            if len(CUIs) == 1:
                df.at[i, 'prediction'] = CUIs[0]['cui']
                df.at[i, 'prediction_name'] = CUIs[0]['name']
                df.at[i, 'prediction_source'] = 'UMLS_partial'
            else:
                df.at[i, 'prediction'] = [
                    CUIs[_]['cui'] for _ in range(len(CUIs))
                ]
                df.at[i, 'prediction_name'] = [
Exemple #25
0
from create_dataset import create_dataset

from rnn import lstm
from fastNLP import Trainer
from fastNLP import CrossEntropyLoss
from fastNLP import AccuracyMetric

vocab, train_data, dev_data, test_data = create_dataset()

model = lstm(vocab_size=len(vocab),
             embedding_length=200,
             hidden_size=128,
             output_size=20)
model.cuda()

loss = CrossEntropyLoss(pred='pred', target='target')
metrics = AccuracyMetric(pred='pred', target='target')

trainer = Trainer(model=model,
                  train_data=train_data,
                  dev_data=dev_data,
                  loss=loss,
                  metrics=metrics,
                  save_path='./',
                  device=0,
                  n_epochs=20)
trainer.train()
    print "class weights"
    print class_weights_dict

    return class_weights_dict


utils_files_dir = 'util_files'
batch_size = 16

if not os.path.exists(utils_files_dir):
    os.mkdir(utils_files_dir)

if not os.path.exists(os.path.join(utils_files_dir, "train.json")):
    logger.info("creating and preparing data for training and testing.")
    create_dataset()

class_weights = get_class_weights()

if not os.path.exists(os.path.join(utils_files_dir, "model.h5")):
    logger.info("creating model.")
    model = get_model()
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['acc'])

    train = dict(json.load(open('util_files/train.json')))
    train_data = train['data']
    len_training_data = len(train_data)

    valid = dict(json.load(open('util_files/valid.json')))
Exemple #27
0
    ITER_PRUNING = args.iter_pruning  # Number of pruning iterations
    BASELINE_ACC = args.baseline_acc  # Accuracy baseline, stop pruning iterations if testAcc is lower than it

    LR = args.lr  # Learning Rate
    BATCH_SIZE = args.batch_size
    EPOCHS = args.epochs  # Early stopping activated, EPOCHS can be high

    # Misc parameters
    FRAC_DATA = args.frac_data  # Take {FRAC_DATA}% of the dataset
    DATA_AUGMENT = args.da_mode  # 'cutmix' or 'random_crop' or 'mixup' or ''
    RANDOM_STATE = 17
    random.seed(RANDOM_STATE)

    ### Data processing ###
    dataloaders_length = create_dataset(batch_size=BATCH_SIZE,
                                        frac_data=FRAC_DATA,
                                        random_state=RANDOM_STATE,
                                        data_augment=DATA_AUGMENT)

    trainloader = torch.load(f'{DATA_PATH}train_data.pt')
    validationloader = torch.load(f'{DATA_PATH}val_data.pt')
    testloader = torch.load(f'{DATA_PATH}test_data.pt')

    dataloaders = {
        "train": trainloader,
        "val": validationloader,
        "test": testloader
    }

    dataset_sizes = {
        "train": dataloaders_length[0],
        "val": dataloaders_length[1],
Exemple #28
0
def _main():
    parser = _create_parser()
    args = parser.parse_args()
    set_names = ["train", "valid", "test"]
    if args.only_train:
        set_names = [set_names[0]]
    if args.only_set is not None:
        set_names = [args.only_set]
    input_paths = [
        os.path.join(args.rotowire_dir, f + ".json") for f in set_names
    ]

    if args.activity == _extract_activity_descr:
        output_paths, all_named_entities, cell_dict_overall, max_table_length = _prepare_for_extract(
            args, set_names)
    elif args.activity == _create_dataset_descr:
        input_paths, output_paths, total_vocab, max_table_length, \
            max_summary_length, max_plan_length = create_prepare(args, set_names, input_paths)
    elif args.activity == _gather_stats_descr:
        output_paths = set_names

    logger = Logger(log=args.log)
    train_dict = None

    for input_path, output_path in zip(input_paths, output_paths):
        if args.activity == _extract_activity_descr:
            print(f"working with {input_path}, extracting to {output_path}")
            mtl = extract_summaries_from_json(
                input_path,
                output_path,
                logger,
                transform_player_names=args.transform_players,
                prepare_for_bpe_training=args.prepare_for_bpe_training,
                prepare_for_bpe_application=args.prepare_for_bpe_application,
                exception_cities=args.exception_cities,
                exception_teams=args.exception_teams,
                lowercase=args.lowercase,
                words_limit=args.words_limit,
                all_named_entities=all_named_entities,
                cell_dict_overall=cell_dict_overall)
            if mtl > max_table_length: max_table_length = mtl
        elif args.activity == _gather_stats_descr:
            print(f"working with {input_path}")
            if os.path.basename(input_path) == "train.json":
                train_dict = gather_json_stats(
                    input_path,
                    logger,
                    transform_player_names=args.transform_players)
                if args.five_occurrences:
                    train_dict = train_dict.sort(prun_occurrences=5)
            else:
                gather_json_stats(
                    input_path,
                    logger,
                    train_dict,
                    transform_player_names=args.transform_players)
        elif args.activity == _create_dataset_descr:
            create_dataset(input_path,
                           output_path,
                           total_vocab,
                           max_plan_length=max_plan_length,
                           max_summary_length=max_summary_length,
                           max_table_length=max_table_length,
                           logger=logger)

    if args.activity == _extract_activity_descr and args.entity_vocab_path is not None:
        all_named_entities.sort().save(args.entity_vocab_path)
    if args.activity == _extract_activity_descr and args.cell_vocab_path is not None:
        cell_dict_overall.sort().save(args.cell_vocab_path)
    if args.activity == _extract_activity_descr and args.config_path is not None:
        with open(args.config_path, "w") as f:
            print(max_table_length, file=f)
Exemple #29
0
from multiclass_3D_CNN import buildModel
from create_dataset import create_dataset
#from create_dataset_copy import create_dataset

config = tf.ConfigProto()
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
#config.gpu_options.per_process_gpu_memory_fraction = 0.5
config.gpu_options.allow_growth = True
tf.keras.backend.set_session(tf.Session(config=config))

if __name__ == '__main__':

    print('begin creating the input dataset')
    data_dir = '/home/d1274/no_backup/d1274/data'

    datagen = create_dataset(data_dir)

    # The image shape is (236, 320, 260)
    #model = Resnet3DBuilder.build_resnet_50((64, 64, 64, 1), 3)
    model = buildModel((64, 64, 64, 1), 3)
    print(model.summary())

    model.compile(loss="categorical_crossentropy",
                  optimizer="sgd",
                  metrics=['acc'])

    model_dir = os.path.join('/home/d1274/no_backup/d1274/model',
                             "IQA_test_with_tfrecord")
    os.makedirs(model_dir, exist_ok=True)
    print("model_dir: ", model_dir)
    est_iqa = tf.keras.estimator.model_to_estimator(keras_model=model,
Exemple #30
0
               marker='o',
               s=4)
    ax.scatter([a[indeces_plot[0]], b[indeces_plot[0]]],
               [a[indeces_plot[1]], b[indeces_plot[1]]],
               marker='*',
               s=100)
    plt.figure()
    fig = plt.gcf()
    ax = fig.gca()
    ax.scatter(data_sample[indeces_plot[2], :],
               data_sample[indeces_plot[3], :],
               marker='o',
               s=4)
    ax.scatter([a[indeces_plot[2]], b[indeces_plot[2]]],
               [a[indeces_plot[3]], b[indeces_plot[3]]],
               marker='*',
               s=100)

sampler_global = region_sampler()
generate_data_from_coefs(file_name, initial_coef, sampler_global,
                         assign_region, size_dataset, n_parameters)

file_name = create_dataset(n_parameters,
                           assign_region,
                           n_parameter_region,
                           size_dataset,
                           file_name=file_name,
                           initial_coef=initial_coef)

stophere