def main():
    # read args
    args = u.read_args()
    u.create_directories(args)

    #create classification model
    c = Regression(args)
    #if training flag is true build model and train it
    if args['train']:

        model = c.build()
        plot_model(model,
                   to_file='regression.png',
                   show_layer_names=False,
                   show_shapes=False)
        operator = Train(model, args)
        operator.train()

    #if test is true, load best model and test it
    if args['test']:
        #load data only without creating model
        operator = Train(None, args)
        true, predicted = operator.load()

        plt.plot(true, color='red', label='true')
        plt.plot(predicted, color='blue')
        plt.show()
Beispiel #2
0
def compute_similarity(input_folder, save=False, output_folder="similarity/"):
    artists_list = []
    elvis_files = glob.glob(input_folder + "/*.json")
    prefix = "_".join(input_folder.split("/"))
    utils.create_directories(output_folder)
    output_matrix = output_folder + "/" + prefix + "_similarity_matrix.npy"
    output_index = output_folder + "/" + prefix + "_artists_list.tsv"
    graphs = []
    for file in elvis_files:
        G = nx.Graph()
        data = json.load(codecs.open(file, "r", "utf-8"))
        filename = file[file.rfind("/") + 1 : -5]
        G.add_node(filename)
        for sentence in data:
            for entity in sentence["entities"]:
                G.add_edge(filename, entity["uri"])
        graphs.append(G)
        artists_list.append(filename)

    sim_matrix = np.zeros((len(graphs), len(graphs)))
    for i in range(0, len(graphs)):
        for j in range(i, len(graphs)):
            mcs = _maximal_common(graphs[i], graphs[j])
            sim_matrix[i, j] = mcs
            sim_matrix[j, i] = mcs

    if save:
        np.save(output_matrix, sim_matrix)
        fw = open(output_index, "w")
        fw.write("\n".join(artists_list))
        fw.close()
    return sim_matrix, artists_list
Beispiel #3
0
def process_folder(technique, input_folder, output_folder="", tokenize=True, start_index=0, end_index=None):
    if output_folder == "":
        output_folder = 'entities/' + input_folder[input_folder.rfind('/')+1:] + "/" + technique
    utils.create_directories(output_folder)
    input_filenames = sorted(list(glob.glob(input_folder+"/*.txt")))
    i = 0
    for input_filename in input_filenames[start_index:end_index]:
        suffix = input_filename[input_filename.rfind("/")+1:-4]
        output_filename = output_folder+"/"+suffix+".json"
        if not os.path.exists(output_filename):
            if tokenize:
                with codecs.open(input_filename, "r", "utf-8") as f:
                    text = f.read()
                sentences = sent_tokenize(text)
            else:
                with codecs.open(input_filename, "r", "utf-8") as f:
                    sentences = [line for line in f]
            ner_sentences = []
            if technique == 'tagme':
                ner_sentences = tagme(sentences)
            elif technique == 'babelfy':
                ner_sentences = babelfy(sentences)
            elif technique == 'spotlight':
                ner_sentences = spotlight(sentences)
            json.dump(ner_sentences, codecs.open(output_filename, "w", "utf-8"))
        i += 1
        sys.stdout.write("\rProcessing Data: %d of %d" % (i, len(input_filenames[start_index:end_index])))
        sys.stdout.flush()
Beispiel #4
0
def compute_similarity(input_folder, save=False, output_folder='similarity/'):
    artists_list = []
    elvis_files = glob.glob(input_folder + "/*.json")
    prefix = "_".join(input_folder.split('/'))
    utils.create_directories(output_folder)
    output_matrix = output_folder + "/" + prefix + "_similarity_matrix.npy"
    output_index = output_folder + "/" + prefix + "_artists_list.tsv"
    graphs = []
    for file in elvis_files:
        G = nx.Graph()
        data = json.load(codecs.open(file, "r", "utf-8"))
        filename = file[file.rfind("/") + 1:-5]
        G.add_node(filename)
        for sentence in data:
            for entity in sentence['entities']:
                G.add_edge(filename, entity['uri'])
        graphs.append(G)
        artists_list.append(filename)

    sim_matrix = np.zeros((len(graphs), len(graphs)))
    for i in range(0, len(graphs)):
        for j in range(i, len(graphs)):
            mcs = _maximal_common(graphs[i], graphs[j])
            sim_matrix[i, j] = mcs
            sim_matrix[j, i] = mcs

    if save:
        np.save(output_matrix, sim_matrix)
        fw = open(output_index, 'w')
        fw.write("\n".join(artists_list))
        fw.close()
    return sim_matrix, artists_list
def prepare_data(train_dir):
    """Prepare data for training"""
    # No need to create training or val data directory if they already exist
    if osp.isdir(cfg.train_data_dir) and osp.isdir(cfg.val_data_dir):
        print("Using existing data directories: \n{}\n{}\n".format(
            cfg.train_data_dir, cfg.val_data_dir))
        # Still need to set number of training and val images
        for class_name in cfg.classes:
            cfg.nb_train_samples += len(
                os.listdir(osp.join(cfg.train_data_dir, class_name)))
            cfg.nb_val_samples += len(
                os.listdir(osp.join(cfg.val_data_dir, class_name)))
    else:
        print("Loading training images...\n")
        # Load all training images from given directory
        imgs, _, img_paths = load_train_dir(train_dir)

        # Split into training (80%) and val (20%) sets
        train_imgs, val_imgs, train_img_paths, val_img_paths = train_test_split(
            imgs, img_paths, test_size=0.20, random_state=seed)

        # Set number of training samples and val samples
        cfg.nb_train_samples = len(train_imgs)
        cfg.nb_val_samples = len(val_imgs)

        # Create data directories for training and val data
        for class_name in cfg.classes:
            create_directories(osp.join(cfg.train_data_dir, class_name))
            create_directories(osp.join(cfg.val_data_dir, class_name))

        print("Writing images to training data directory.\n")
        write_data_directory(train_imgs, train_img_paths, cfg.train_data_dir)
        print("Writing images to val data directory.\n")
        write_data_directory(val_imgs, val_img_paths, cfg.val_data_dir)
Beispiel #6
0
def main(config):

    if config.task == 'train':
        config.train = 1
    else:
        config.train = 0

    if config.dataset == 'life':
        config.task = 'regression'
        config.experiment = 'train-test'
    else:
        config.task = 'classification'
        config.experiment = 'doublecv'

    config.expt_name = "Exp" + str(
        config.experiment
    ) + "_" + config.mod_split + "_" + config.build_model + "_" + config.last_layer

    # Create save directories
    utils.create_directories(config)
    data = load_dataset(config)

    if config.experiment == 'mar_doublecv' or config.experiment == 'doublecv':
        n_feature_sets = len(data.keys()) - 1
    elif config.dataset == 'life':
        n_feature_sets = int(len(data.keys()) / 2) - 1

    X = [np.array(data['{}'.format(i)]) for i in range(n_feature_sets)]
    y = np.array(data['y'])

    X_test = None
    y_test = None

    if config.task == 'classification':
        config.n_classes = len(set(y))

    if config.dataset == 'life':
        X_test = [
            np.array(data['{}_test'.format(i)]) for i in range(n_feature_sets)
        ]
        y_test = np.array(data['y_test'])

    config.n_feature_sets = n_feature_sets
    config.feature_split_lengths = [i.shape[1] for i in X]

    if config.verbose > 0:
        print('Dataset used ', config.dataset)
        print('Number of feature sets ', n_feature_sets)
        [
            print('Shape of feature set {} {}'.format(e,
                                                      np.array(i).shape))
            for e, i in enumerate(X)
        ]

    trainer.train(X, y, config, X_test, y_test)

    print(config.expt_name)
    print(config.dataset)
def train():
    create_directories()
    # start recording summaries
    log_summaries()

    writer = tf.train.SummaryWriter(
        logdir,
        graph=train_sess.graph
    )

    mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

    init = tf.global_variables_initializer()

    train_sess.run(init)
    print('Initialized Variables...')

    print('Training...')
    print('Launch TensorBoard to see metrics.')

    for i in range(n_iter):
        batch = mnist.train.next_batch(batch_size)
        _, summ = train_sess.run(
          [optimizer, summaries],
          feed_dict={x: batch[0], y: batch[1], keep_prob: dropout_prob}
        )

        writer.add_summary(summ, global_step=i)

    print('')
    # done training, calculate acc on test set
    print("Test Accuracy: %g" % train_sess.run(
        accuracy,
        feed_dict={
            x: mnist.test.images,
            y: mnist.test.labels,
            keep_prob: 1.0
        }
    ))

    print('')

    # save if desired
    while True:
        prompt = raw_input('Do you wish to save model weights? [y/N] ')

        if prompt == 'y':
            fname = raw_input('Enter filename > ')
            save_path = saver.save(train_sess, path.join(savedir, fname))
            print('Model saved at ' + save_path)
            break
        elif prompt == 'N':
            break

    # close files and sessions
    writer.close()
    train_sess.close()
Beispiel #8
0
def voting(source, level):
    tools = ['babelfy', 'tagme', 'spotlight']
    filenames = sorted(list(glob.glob(source + "/" + tools[0] + "/*.json")))
    output_folder = source + "/agreement_" + str(level) + "/"
    utils.create_directories(output_folder)
    n = 0
    for file in filenames:
        output_sentences = []
        name = file[file.rfind("/") + 1:]
        sentences = json.load(codecs.open(file, "r", "utf-8"))
        ner_file = dict()
        ner_sentences = dict()
        for tool in tools:
            ner_file[tool] = source + "/" + tool + "/" + name
            ner_sentences[tool] = json.load(
                codecs.open(ner_file[tool], "r", "utf-8"))
        i = 0
        for i in range(0, len(sentences)):
            sentence = dict()
            sentence['text'] = sentences[i]['text']
            sentence['index'] = sentences[i]['index']
            sentence['entities'] = []
            entities = dict()
            all_entities = dict()
            for tool in tools:
                entities[tool] = set()
                for entity in ner_sentences[tool][i]['entities']:
                    entities[tool].add((entity['startChar'], entity['endChar'],
                                        entity['uri']))
                    all_entities[(entity['startChar'], entity['endChar'],
                                  entity['uri'])] = entity
            agreement3 = entities[tools[0]].intersection(
                entities[tools[1]]).intersection(entities[tools[2]])
            if level == 3:
                agreement = agreement3
            elif level == 2:
                inter1 = entities[tools[0]].intersection(entities[tools[1]])
                inter2 = entities[tools[0]].intersection(entities[tools[2]])
                inter3 = entities[tools[1]].intersection(entities[tools[2]])
                agreement = inter1.union(inter2).union(inter3)
            for entity_key in agreement:
                entity = all_entities[entity_key]
                if level == 3 or (level == 2 and entity_key in agreement3):
                    entity['confidence'] = 3
                else:
                    entity['confidence'] = 2
                sentence['entities'].append(entity)
            output_sentences.append(sentence)
        json.dump(output_sentences,
                  codecs.open(output_folder + name, 'w', 'utf-8'))
        n += 1
        if n % 1000 == 0:
            print n
Beispiel #9
0
def voting(source,level):
	tools = ['babelfy','tagme','spotlight']
	filenames = sorted(list(glob.glob(source+"/"+tools[0]+"/*.json")))
	output_folder = source+"/agreement_"+str(level)+"/"
	utils.create_directories(output_folder)
	n = 0
	for file in filenames:
		output_sentences = []
		name = file[file.rfind("/")+1:]	
		sentences = json.load(codecs.open(file,"r", "utf-8"))
		ner_file = dict()
		ner_sentences = dict()
		for tool in tools:
			ner_file[tool] = source+"/"+tool+"/"+name
			ner_sentences[tool] = json.load(codecs.open(ner_file[tool],"r", "utf-8"))
		i = 0
		for i in range(0,len(sentences)):
			sentence = dict()
			sentence['text'] = sentences[i]['text']
			sentence['index'] = sentences[i]['index']
			sentence['entities'] = []
			entities = dict()
			all_entities = dict()
			for tool in tools:
				entities[tool] = set()
				for entity in ner_sentences[tool][i]['entities']:
					entities[tool].add((entity['startChar'],entity['endChar'],entity['uri']))
					all_entities[(entity['startChar'],entity['endChar'],entity['uri'])] = entity
			agreement3 = entities[tools[0]].intersection(entities[tools[1]]).intersection(entities[tools[2]])
			if level == 3:
				agreement = agreement3
			elif level == 2:
				inter1 = entities[tools[0]].intersection(entities[tools[1]])
				inter2 = entities[tools[0]].intersection(entities[tools[2]])
				inter3 = entities[tools[1]].intersection(entities[tools[2]])
				agreement = inter1.union(inter2).union(inter3)
			for entity_key in agreement:
				entity = all_entities[entity_key]
				if level == 3 or (level == 2 and entity_key in agreement3):
					entity['confidence'] = 3
				else:
					entity['confidence'] = 2
				sentence['entities'].append(entity)
			output_sentences.append(sentence)
		json.dump(output_sentences, codecs.open(output_folder+name,'w','utf-8'))
		n += 1
		if n % 1000 == 0:
			print n
Beispiel #10
0
    def __init__(self, opts, load=False):
        self.sess = tf.Session()

        self.opts = opts
        utils.opts_check(self)

        self.z_dim = self.opts['z_dim']
        self.batch_size = self.opts['batch_size']
        self.train_data, self.test_data = utils.load_data(self, seed=0)

        self.data_dims = self.train_data.shape[1:]
        self.input = tf.placeholder(tf.float32, (None, ) + self.data_dims,
                                    name="input")

        self.losses_train = []
        self.losses_test_random = []
        self.losses_test_fixed = []

        self.experiment_path = self.opts['experiment_path']

        if load is False:
            utils.create_directories(self)
            utils.save_opts(self)
            utils.copy_all_code(self)

        models.encoder_init(self)
        models.decoder_init(self)
        models.prior_init(self)
        models.loss_init(self)
        models.optimizer_init(self)
        if 'data_augmentation' in self.opts and self.opts[
                'data_augmentation'] is True:
            models.data_augmentation_init(self)

        self.fixed_test_sample = self.sample_minibatch(test=True, seed=0)
        self.fixed_train_sample = self.sample_minibatch(test=False, seed=0)
        self.fixed_codes = self.sample_codes(seed=0)

        if self.opts['make_pictures_every'] is not None:
            utils.plot_all_init(self)

        self.saver = tf.train.Saver(keep_checkpoint_every_n_hours=2)
        self.sess.run(tf.global_variables_initializer())

        if load is True:
            self.load_saved_model()
def main(config):

    # Create save directories
    utils.create_directories(config)

    # Prepare and load the data
    data = dataset.prepare_data(config.dataset_dir, config)

    # Train the ensemble models
    # if config.training_type == 'bagging':
    # 	ensemble_trainer.bagging_ensemble_training(data, config)
    # elif config.training_type == 'boosting':
    # 	ensemble_trainer.boosted_ensemble_training(data, config)

    # Evaluate the model
    test_data = dataset.prepare_test_data(config.test_dataset_dir, config)
    evaluator.evaluate(data, test_data, config)

    print(config.model_dir, config.boosting_type, config.voting_type)
def main(config):

    # Create save directories
    utils.create_directories(config)

    # Prepare and load the data
    if 'silences' in config.model_types:
        data = dataset.prepare_data_new(config.dataset_dir, config)
    else:
        data = dataset.prepare_data(config.dataset_dir, config)
    # print(data)
    # return
    # Train the ensemble models
    if config.training_type == 'bagging':
        ensemble_trainer.bagging_ensemble_training(data, config)
    elif config.training_type == 'boosting':
        ensemble_trainer.boosted_ensemble_training(data, config)

    # Evaluate the model
    if 'silences' not in config.model_types:
        test_data = dataset.prepare_test_data(config.test_dataset_dir, config)
        evaluator.evaluate(data, test_data, config)
Beispiel #13
0
def process_folder(technique,
                   input_folder,
                   output_folder="",
                   tokenize=True,
                   start_index=0,
                   end_index=None):
    if output_folder == "":
        output_folder = 'entities/' + input_folder[input_folder.rfind('/') +
                                                   1:] + "/" + technique
    utils.create_directories(output_folder)
    input_filenames = sorted(list(glob.glob(input_folder + "/*.txt")))
    i = 0
    for input_filename in input_filenames[start_index:end_index]:
        suffix = input_filename[input_filename.rfind("/") + 1:-4]
        output_filename = output_folder + "/" + suffix + ".json"
        if not os.path.exists(output_filename):
            if tokenize:
                with codecs.open(input_filename, "r", "utf-8") as f:
                    text = f.read()
                sentences = sent_tokenize(text)
            else:
                with codecs.open(input_filename, "r", "utf-8") as f:
                    sentences = [line for line in f]
            ner_sentences = []
            if technique == 'tagme':
                ner_sentences = tagme(sentences)
            elif technique == 'babelfy':
                ner_sentences = babelfy(sentences)
            elif technique == 'spotlight':
                ner_sentences = spotlight(sentences)
            json.dump(ner_sentences, codecs.open(output_filename, "w",
                                                 "utf-8"))
        i += 1
        sys.stdout.write("\rProcessing Data: %d of %d" %
                         (i, len(input_filenames[start_index:end_index])))
        sys.stdout.flush()
Beispiel #14
0
def main():
    # read args
    args = u.read_args()
    u.create_directories(args)

    #create classification model
    c = Classifier(args)

    #if training flag is true build model and train it
    if args['train']:

        model = c.build()
        plot_model(model,
                   to_file=args['exp_dir'] + 'modelimage' + '.png',
                   show_layer_names=False,
                   show_shapes=False)
        operator = Train(model, args)
        operator.train()
        operator.validate()

    #if test is true, load best model and test it
    if args['test']:
        #load data only without creating model
        operator = Train(None, args)
        operator.validate()
        true, predicted = operator.test()

        #plot confusion matrix
        class_names = ['0', '1']
        cf = confusion_matrix(true, predicted)
        plt.figure()
        u.plot_confusion_matrix(
            cf,
            classes=class_names,
            normalize=False,
            title='Confusion matrix, without normalization')
def train(source, target):

    scaled_logits, src_acc, trgt_acc, grad = build_graph(source, target)

    init = tf.global_variables_initializer()
    summaries = tf.merge_all_summaries()

    if not path.isdir(savedir):
        print('No models found. Start training.')
        covnet_model.train()

    create_directories()

    if raw_input('Do you want to use your own weights? [y\N] ') == 'y':
        fname = raw_input('Enter saved model name > ')
        weights = path.join(savedir, fname)
    else:
        weights = path.join(savedir, 'default')

    with tf.Session() as sess:
        sess.run(init)
        covnet_model.saver.restore(sess, weights)
        print('Weights restored.')

        mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

        writer = tf.train.SummaryWriter(logdir, graph=sess.graph)

        src_images, src_labels = get_class(source, mnist.test.images,
                                           mnist.test.labels)

        # pick a random image that is correctly classified by CNN
        k = 0
        while True:
            original = src_images[np.newaxis, k]
            label = src_labels[np.newaxis, k]
            image = np.copy(original)

            l = scaled_logits.eval(
                feed_dict={
                    covnet_model.x: original,
                    covnet_model.y: label,
                    covnet_model.keep_prob: 1.
                })

            if np.argmax(l) == source:
                # correctly classified
                break

        print('Generating Adversarial Image...')
        print('Open tensorboard to visualize.')

        # train loop
        i = 0
        target_acc = 0.
        start_acc = []

        while target_acc < .99:  # fool to 99% acc
            source_acc, target_acc, dimg, summ = sess.run(
                [src_acc, trgt_acc, grad, summaries],
                feed_dict={
                    covnet_model.x: image,
                    covnet_model.y: label,
                    covnet_model.keep_prob: 1.
                })

            if i == 0:
                start_acc.extend([source_acc, target_acc])

            writer.add_summary(summ, global_step=i)

            image = image + learning_rate * dimg.reshape(1, 28 * 28)

            diff = np.abs(original - image)

            print("%d  source_acc %.5f, target_acc %.5f, sum: %.5f" %
                  (i, source_acc, target_acc, np.sum(diff)))

            i += 1

        print('Adversarial example generated.')

        # Show the example
        fig = plt.figure(figsize=(30, 10))

        plt.subplot(131)
        plt.imshow(original.reshape(28, 28), cmap='gray')
        plt.axis('off')
        plt.title('Original. source: (%f), target: (%f)' % tuple(start_acc))

        plt.subplot(132)
        plt.imshow(diff.reshape(28, 28), cmap='gray')
        plt.title('Delta (%f)' % np.sum(diff))
        plt.axis('off')

        plt.subplot(133)
        plt.imshow(image.reshape(28, 28), cmap='gray')
        plt.axis('off')
        plt.title('Adversarial source: (%f), target: (%f)' %
                  (source_acc, target_acc))

        plt.show()

        # ask to save
        while True:
            prompt = raw_input('Do you want to save this example? [y\N] ')

            if prompt == 'y':
                fname = raw_input(
                    'Enter name of npy file without extension > ')
                np.savez(path.join(exampledir, fname),
                         source=original,
                         delta=diff,
                         target=image,
                         source_acc=source_acc,
                         target_acc=target_acc)
                break
            elif prompt == 'N':
                break

        covnet_model.train_sess.close()
Beispiel #16
0
    model_prefix = args.dataset + '_'

    # classifier to extract features (for fid score computation)
    try:
        classifier = torch.load('classifier.pt', map_location='cpu')
        classifier.eval()
        print('Classifier loaded!')
    except FileNotFoundError:
        classifier = Classifier()
        sys.exit("Need to train a classifier!")
        # TODO: train classifier

    # directories for generated samples
    dir_results = model_prefix + 'results'
    dir_samples = model_prefix + 'samples'
    create_directories(dir_results, dir_samples)

    ########## TEST MODE ##########
    if args.epochs is None:
        # load generator
        G = torch.load(model_prefix + 'generator.pt').to(device)
        print('Generator loaded!')
        # generate samples
        generate_samples(G, dir_samples, args.batchsize, num_samples=4096)
        sampleloader = get_sample_loader(dir_samples, args.batchsize,
                                         image_size)
        print('Samples generated!')
        # compute fid score with test set
        fid_score = get_fid_score(classifier, sampleloader, testloader)
        sys.exit("FID score from test set: " + str(fid_score))
def test():
    model_type = FLAGS.model_type
    run_desc = FLAGS.run_desc
    data_dir = Path(FLAGS.data_dir)
    checkpoints_dir = Path(FLAGS.checkpoints_dir) / model_type / run_desc
    models_dir = Path(FLAGS.models_dir) / model_type / run_desc
    results_dir = Path(FLAGS.results_dir) / model_type / run_desc
    learning_rate = LEARNING_RATE
    epoch_no = FLAGS.epoch
    sent_hidden_dim = FLAGS.sent_hidden_dim
    doc_hidden_dim = FLAGS.doc_hidden_dim

    if not data_dir.exists():
        raise ValueError('Data directory does not exist')

    # create other directories if they do not exist
    create_directories(checkpoints_dir, models_dir, results_dir)

    # load the data
    print('Loading the data...')

    # get the glove and elmo embedding
    glove_dim = 0
    elmo_dim = 0
    GloVe_vectors = None
    ELMo = None
    if 'glove' in model_type:
        GloVe_vectors = GloVe()
        glove_dim = WORD_EMBED_DIM
        print('Uploaded GloVe embeddings.')
    if 'elmo' in model_type:
        ELMo = Elmo(options_file=ELMO_OPTIONS_FILE,
                    weight_file=ELMO_WEIGHT_FILE,
                    num_output_representations=1,
                    requires_grad=False,
                    dropout=0).to(DEVICE)
        elmo_dim = ELMO_EMBED_DIM
        print('Uploaded Elmo embeddings.')
    input_dim = glove_dim + elmo_dim
    # get the fnn and snli data

    FNN_small_test = FNNDataset(data_dir / ('FNN_small_test.pkl'),
                                GloVe_vectors, ELMo)
    FNN_DL_small_test = data.DataLoader(dataset=FNN_small_test,
                                        batch_size=BATCH_SIZE_FN,
                                        num_workers=0,
                                        shuffle=True,
                                        drop_last=True,
                                        collate_fn=PadSortBatchFNN())
    print('Uploaded FNN data.')

    print('Initializing the model...', end=' ')

    model = initialize_han(input_dim, sent_hidden_dim, doc_hidden_dim,
                           NUM_CLASSES_FN, DEVICE)

    print('Working on: ', end='')
    print(DEVICE)
    print('Done!')
    print_model_parameters(model)
    print()

    optimizer = optim.Adam(params=model.parameters(), lr=LEARNING_RATE)

    print('Loading model weights.')
    #model.load_state_dict(torch.load(CHECKPOINTS_DIR_DEFAULT / 'HierarchicalAttentionNet_model.pt'))
    if epoch_no == '0':
        model_path = models_dir / Path('HierarchicalAttentionNet_model.pt')
        model = load_model(model_path, model, checkpoint=False)
        #_, _, _ = load_latest_checkpoint(model_path, model, optimizer)
    else:
        checkpoint_path = checkpoints_dir / Path(
            'HierarchicalAttentionNet_Adam_checkpoint_' + str(epoch_no) +
            '_.pt')
        model = load_model(checkpoint_path, model, checkpoint=True)
        #_, _, _ = load_checkpoint(checkpoint_path, model, optimizer)

    model.eval()
    loss_func_fn = nn.CrossEntropyLoss()
    y_pred = []
    y_true = []
    for step, batch in enumerate(FNN_DL_small_test):
        articles, article_dims, labels = batch
        out = model(batch=articles, batch_dims=article_dims)
        y_pred.append(out.argmax(dim=1).to(DEVICE).item())
        y_true.append(labels.to(DEVICE).item())
        if step % 100 == 0 and step != 0:
            print(
                sum(1 for x, y in zip(y_pred, y_true) if x == y) / len(y_pred))
            #print(sklearn.metrics.precision_recall_fscore_support(y_true, y_pred, average=None))
    print(
        sklearn.metrics.precision_recall_fscore_support(y_true,
                                                        y_pred,
                                                        average='micro'))
    print(
        sklearn.metrics.precision_recall_fscore_support(y_true,
                                                        y_pred,
                                                        average='macro'))
    print(
        sklearn.metrics.precision_recall_fscore_support(y_true,
                                                        y_pred,
                                                        average=None))
Beispiel #18
0
def train_llp(file_name,
              model_name,
              useGPU2,
              constit_input,
              track_input,
              MSeg_input,
              jet_input,
              plt_model=False,
              frac=1.0,
              batch_size=5000,
              reg_value=0.001,
              dropout_value=0.1,
              epochs=50,
              learning_rate=0.002,
              hidden_fraction=1,
              kfold=None):
    """
    Takes in arguments to change architecture of network, does training, then runs evaluate_training
    :param file_name: Name of the .pkl file containing all the data
    :param model_name: Name of the model
    :param useGPU2: True to use GPU2
    :param constit_input: ModelInput object for constituents
    :param track_input: ModelInput object for tracks
    :param MSeg_input: ModelInput object for muon segments
    :param jet_input: ModelInput object for jets
    :param plt_model: True to save model architecture to disk
    :param frac: Fraction of events to use in file_name
    :param batch_size: Number of training examples in one forward/backward pass
    :param reg_value: Value of regularizer term for LSTM
    :param dropout_value: Fraction of the input units to drop
    :param epochs: Number of epochs to train the model
    :param learning_rate: Learning rate
    :param hidden_fraction: Fraction by which to multiple the dense layers
    :param kfold: KFold object to do KFold cross validation
    """
    # Setup directories
    print("\nSetting up directories...\n")
    dir_name = create_directories(
        model_name,
        os.path.split(os.path.splitext(file_name)[0])[1])

    # Write a file with some details of architecture, will append final stats at end of training
    print("\nWriting to file training details...\n")
    f = open("plots/" + dir_name + "/training_details.txt", "w+")
    f.write("File name\n")
    f.write(file_name + "\n")
    f.write("\nModel name\n")
    f.write(model_name + "\n")
    f.write("\nModelInput objects\n")
    f.write(str(vars(constit_input)) + "\n")
    f.write(str(vars(track_input)) + "\n")
    f.write(str(vars(MSeg_input)) + "\n")
    f.write(str(vars(jet_input)) + "\n")
    f.write("\nOther hyperparameters\n")
    f.write(
        "frac = %s\nbatch_size = %s\nreg_value = %s\ndropout_value = %s\nepochs = %s\nlearning_rate = %s\n"
        "hidden_fraction = %s\n" % (frac, batch_size, reg_value, dropout_value,
                                    epochs, learning_rate, hidden_fraction))
    f.close()

    # Do Keras_setup
    print("\nSetting up Keras...\n")
    keras_setup()

    # Choose GPU
    if useGPU2:
        os.environ["CUDA_VISIBLE_DEVICES"] = "1"

    # Load dataset
    print("\nLoading up dataset " + file_name + "...\n")
    df = load_dataset(file_name)

    # Extract labels
    Y = df['label']
    # Use pt flattened weights from pre-processing for weights
    weights = df['flatWeight']
    # Keep mcWeights
    mcWeights = df['mcEventWeight']
    # Hard code start and end of names of variables
    X = df.loc[:, 'clus_pt_0':'nn_MSeg_t0_29']
    X = df.loc[:, 'jet_pt':'jet_phi'].join(X)

    # Label Z as parametrized variables
    Z = df.loc[:, 'llp_mH':'llp_mS']

    # Save memory
    del df

    # Handle case if no KFold
    if kfold is None:
        # Split data into train/test datasets
        X_train, X_test, y_train, y_test, weights_train, weights_test, mcWeights_train, mcWeights_test, Z_train, Z_test = \
            train_test_split(X, Y, weights, mcWeights, Z, test_size=0.2)

        # Delete variables to save memory
        del X
        del Y
        del Z

        # Call method that prepares data, builds model architecture, trains model, and evaluates model
        roc_auc, test_acc = build_train_evaluate_model(
            constit_input, track_input, MSeg_input, jet_input, X_train, X_test,
            y_train, y_test, mcWeights_train, mcWeights_test, weights_train,
            weights_test, Z_test, Z_train, reg_value, frac, dropout_value,
            hidden_fraction, plt_model, batch_size, dir_name, learning_rate,
            epochs)

        return roc_auc, test_acc, dir_name

    else:
        # initialize lists to store metrics
        roc_scores, acc_scores = list(), list()
        # initialize counter for current fold iteration
        n_folds = 0
        # do KFold Cross Validation
        for train_ix, test_ix in kfold.split(X, Y):
            n_folds += 1
            print("\nDoing KFold iteration # %.0f...\n" % n_folds)
            # select samples
            X_train, y_train, weights_train, mcWeights_train, Z_train = \
                X.iloc[train_ix], Y.iloc[train_ix], weights.iloc[train_ix], mcWeights.iloc[train_ix], Z.iloc[train_ix]
            X_test, y_test, weights_test, mcWeights_test, Z_test = \
                X.iloc[test_ix], Y.iloc[test_ix], weights.iloc[test_ix], mcWeights.iloc[test_ix], Z.iloc[test_ix]

            # Call method that prepares data, builds model architecture, trains model, and evaluates model
            roc_auc, test_acc = build_train_evaluate_model(
                constit_input, track_input, MSeg_input, jet_input, X_train,
                X_test, y_train, y_test, mcWeights_train, mcWeights_test,
                weights_train, weights_test, Z_test, Z_train, reg_value, frac,
                dropout_value, hidden_fraction, plt_model, batch_size,
                dir_name, learning_rate, epochs, kfold, n_folds)

            roc_scores.append(roc_auc)
            acc_scores.append(test_acc)

        return roc_scores, acc_scores, dir_name
Beispiel #19
0
def train():
    model_type = FLAGS.model_type
    run_desc = FLAGS.run_desc
    run_desc_tl = FLAGS.run_desc_tl
    data_dir = Path(FLAGS.data_dir)
    checkpoints_dir = Path(FLAGS.checkpoints_dir) / model_type / run_desc
    models_dir = Path(FLAGS.models_dir) / model_type / run_desc
    results_dir = Path(FLAGS.results_dir) / model_type / run_desc
    checkpoints_dir_tl = Path(FLAGS.checkpoints_dir) / model_type / run_desc_tl
    models_dir_tl = Path(FLAGS.models_dir) / model_type / run_desc_tl
    results_dir_tl = Path(FLAGS.results_dir) / model_type / run_desc_tl
    learning_rate = FLAGS.learning_rate
    batch_size_fn = FLAGS.batch_size
    epoch_no = FLAGS.epoch
    sent_hidden_dim = FLAGS.sent_hidden_dim
    doc_hidden_dim = FLAGS.doc_hidden_dim

    if not data_dir.exists():
        raise ValueError('Data directory does not exist')

    # create other directories if they do not exist
    create_directories(checkpoints_dir_tl, models_dir_tl, results_dir_tl)

    # load the data
    print('Loading the data...')

    # get the glove and elmo embedding
    glove_dim = 0
    elmo_dim = 0
    GloVe_vectors = None
    ELMo = None
    if 'glove' in model_type:
        GloVe_vectors = GloVe()
        glove_dim = WORD_EMBED_DIM
        print('Uploaded GloVe embeddings.')
    if 'elmo' in model_type:
        ELMo = Elmo(options_file=ELMO_OPTIONS_FILE,
                    weight_file=ELMO_WEIGHT_FILE,
                    num_output_representations=1,
                    requires_grad=False,
                    dropout=0).to(DEVICE)
        elmo_dim = ELMO_EMBED_DIM
        print('Uploaded Elmo embeddings.')
    input_dim = glove_dim + elmo_dim
    # get the fnn and snli data
    keys = ['train', 'test', 'val']
    FNN_DL_small = {}
    for i in keys:
        FNN_temp = FNNDataset(data_dir / ('FNN_small_' + i + '.pkl'),
                              GloVe_vectors, ELMo)
        FNN_DL_temp = data.DataLoader(dataset=FNN_temp,
                                      batch_size=batch_size_fn,
                                      num_workers=0,
                                      shuffle=True,
                                      drop_last=True,
                                      collate_fn=PadSortBatchFNN())
        FNN_DL_small[i] = FNN_DL_temp
    print('Uploaded FNN data.')

    # initialize the model, according to the model type
    print('Initializing the model for transfer learning...', end=' ')

    model = HierarchicalAttentionNet(input_dim=input_dim,
                                     sent_hidden_dim=sent_hidden_dim,
                                     doc_hidden_dim=doc_hidden_dim,
                                     num_classes=NUM_CLASSES_FN,
                                     dropout=0).to(DEVICE)
    print('Done!')
    print_model_parameters(model)
    print()
    print('Working on: ', end='')
    print(DEVICE)

    # set the criterion and optimizer
    # we weigh the loss: class [0] is real, class [1] is fake
    #
    loss_func_fn = nn.CrossEntropyLoss()
    optimizer = optim.Adam(params=model.parameters(), lr=learning_rate)

    # load the last checkpoint (if it exists)
    results = {
        'epoch': [],
        'train_loss': [],
        'train_accuracy': [],
        'val_loss': [],
        'val_accuracy': []
    }
    if epoch_no == '0':
        model_path = models_dir / Path('HierarchicalAttentionNet_model.pt')
        _, _, _ = load_latest_checkpoint(model_path, model, optimizer)
    else:
        checkpoint_path = checkpoints_dir / Path(
            'HierarchicalAttentionNet_Adam_checkpoint_' + str(epoch_no) +
            '_.pt')
        _, _, _ = load_checkpoint(checkpoint_path, model, optimizer)
    print(f'Starting transfer learning on the model extracted from {epoch_no}')
    epoch = 0
    for i in range(epoch, MAX_EPOCHS):
        print(f'Epoch {i+1:0{len(str(MAX_EPOCHS))}}/{MAX_EPOCHS}:')
        model.train()
        # one epoch of training
        train_loss_fn, train_acc_fn = train_epoch_fn(FNN_DL_small['train'],
                                                     model, optimizer,
                                                     loss_func_fn)

        # one epoch of eval
        model.eval()
        val_loss_fn, val_acc_fn = eval_epoch_fn(FNN_DL_small['val'], model,
                                                loss_func_fn)

        results['epoch'].append(i)
        results['train_loss'].append(train_loss_fn)
        results['train_accuracy'].append(train_acc_fn)
        results['val_loss'].append(val_loss_fn)
        results['val_accuracy'].append(val_acc_fn)
        #print(results)
        best_accuracy = torch.tensor(val_acc_fn).max().item()
        create_checkpoint(checkpoints_dir_tl, i, model, optimizer, results,
                          best_accuracy)

    # save and plot the results
    save_results(results_dir_tl, results, model)
    save_model(models_dir_tl, model)
Beispiel #20
0
from profile import RunPipeline
import argparse

parser = argparse.ArgumentParser(description="Run the profiling pipeline")
parser.add_argument("--config", help="Config file")

args = parser.parse_args()

pipeline, profile_config = load_pipeline(config_file=args.config)

run_pipeline = RunPipeline(pipeline=pipeline, profile_config=profile_config)

for batch in profile_config:
    print(f"Now processing... batch: {batch}")
    for plate in profile_config[batch]:
        create_directories(batch=batch, plate=plate, pipeline=pipeline)

        if "aggregate" in pipeline:
            if pipeline["aggregate"]["perform"]:
                print(f"Now aggregating... plate: {plate}")
                run_pipeline.pipeline_aggregate(batch=batch, plate=plate)

        if "annotate" in pipeline:
            if pipeline["annotate"]["perform"]:
                print(f"Now annotating... plate: {plate}")
                run_pipeline.pipeline_annotate(batch=batch, plate=plate)

        if "normalize" in pipeline:
            if pipeline["normalize"]["perform"]:
                print(f"Now normalizing... plate: {plate}")
                run_pipeline.pipeline_normalize(batch=batch,
Beispiel #21
0
from detect import detect_objects
import utils
import meraki

if __name__ == '__main__':
    """
    Steps:
        1) Create necessary directories;
        2) Connect to Meraki;
        3) Get a list of Meraki Cameras;
        4) For each camera:
            4.1) Downloads a snapshot of the current field of view of the camera;
            4.2) Runs the YOLOv3 model trained on the COCO dataset and stores the image locally.
    """
    utils.create_directories()
    api_key, organization_id, network_id, target_cameras, rtsp = utils.load_config_variables()
    if not api_key or not network_id:
        raise Exception('Meraki API Key and Meraki Network Id are mandatory params. You can hard code them above, '
                        'use a config.ini file or set them as environment variables. Camera serials should be a string '
                        'separated by ;. Camera serials are optional')

    dashboard = utils.establish_meraki_connection(api_key)
    cams = utils.get_cameras(dashboard, network_id, target_cameras)
    print(f'Will process snapshots of {len(cams)} MV cameras')
    if not cams:
        raise Exception(f'The network ({network_id}) used does not contain cameras or the cameras you selected are '
                        'not on the selected network.')
    else:
        for cam in cams:
            serial_number = cam['serial']
Beispiel #22
0
def homogenize(technique, ner_folder, data='server'):
    # Check if remote server is working, otherwise use local files
    remote_working = _check_status()
    if not remote_working or data == 'local':
        print "Starting to load data from local files"
        _load_from_local(technique)
        print "Data loaded"

    if technique == 'all':
        techniques = ['spotlight', 'tagme', 'babelfy']
    else:
        techniques = [technique]

    for technique in techniques:
        output_folder = ner_folder + '/' + technique + "_h/" + "/"
        folder = ner_folder + '/' + technique
        utils.create_directories(output_folder)
        filenames = sorted(list(glob.glob(folder + "/*.json")))
        for file in filenames:
            name = file[file.rfind("/") + 1:]
            sentences = json.load(codecs.open(file, "r", "utf-8"))
            for sentence in sentences:
                entities = []
                for entity in sentence['entities']:
                    add = True
                    if technique.lower() == "spotlight":
                        uri = entity['uri']
                        ret_categories = _get_categories(
                            entity['uri'], use_remote=remote_working)
                        if ret_categories:
                            entity['categories'] = ret_categories
                        if entity['types'] == "":
                            ret_types = _get_types(uri,
                                                   use_remote=remote_working)
                            if ret_types:
                                entity['types'] = ",".join(ret_types)
                    elif technique.lower() == "tagme":
                        entity['types'] = ""
                        ret_id_dbpedia = _get_id_dbpedia(
                            entity['id'], use_remote=remote_working)
                        if ret_id_dbpedia:
                            entity['uri'] = ret_id_dbpedia

                        elif 'uri' in entity:
                            entity[
                                'uri'] = "http://dbpedia.org/resource/" + entity[
                                    'uri'].replace(" ", "_")
                        else:
                            entity['uri'] = "NONE"
                            add = False
                        uri = entity['uri']
                        ret_types = _get_types(uri, use_remote=remote_working)
                        if ret_types:
                            entity['types'] = ",".join(ret_types)

                        formated_categories = []
                        if 'categories' in entity:
                            for category in entity['categories']:
                                formated_categories.append(
                                    category.replace(" ", "_"))
                        entity['categories'] = formated_categories
                    elif technique.lower() == "babelfy":
                        entity['types'] = ""
                        if "dbpedia" in entity['uri']:
                            entity['uri'] = entity['uri'].replace(
                                "\\u0026", "&").replace("\\u0027", "'")
                            ret_categories = _get_categories(
                                entity['uri'], use_remote=remote_working)
                            if ret_categories:
                                entity['categories'] = ret_categories
                            ret_types = _get_types(entity['uri'],
                                                   use_remote=remote_working)
                            if ret_types:
                                entity['types'] = ",".join(ret_types)

                            entity['endChar'] += 1
                    ret_redirection = _get_redirections(
                        entity['uri'], use_remote=remote_working)
                    if ret_redirection:
                        entity['uri'] = ret_redirection
                    if add:
                        entities.append(entity)
                sentence['entities'] = entities
            print output_folder
            json.dump(sentences, codecs.open(output_folder + name, "w",
                                             "utf-8"))
            print name
Beispiel #23
0
def train():
    data_dir = Path(FLAGS.data_dir)
    checkpoints_dir = Path(FLAGS.checkpoints_dir)
    models_dir = Path(FLAGS.models_dir)
    results_dir = Path(FLAGS.results_dir)

    if not data_dir.exists():
        raise ValueError('Data directory does not exist')
    
    # create other directories if they do not exist
    create_directories(checkpoints_dir, models_dir, results_dir)
    
    # load the data
    print('Loading the data...')
    adj_file = data_dir / 'adj_matrix.npz'
    features_file = data_dir / 'features_matrix.pkl'
    labels_file = data_dir / 'labels_matrix.pkl'
    splits_file = data_dir / 'splits_dict.pkl'
    adj, features, labels, splits_dict = load_data(adj_file, features_file, labels_file, splits_file)
    train_idxs = splits_dict['train']
    val_idxs = splits_dict['val']
    test_idxs = splits_dict['test']

    # initialize the model, according to the model type
    print('Initializing the model...')
    model = GraphConvolutionalNetwork(
            input_dim=features.shape[1], 
            hidden_dim=HIDDEN_DIM, 
            num_classes=labels.max().item() + 1,  
            dropout=DROPOUT
    ).to(DEVICE)
    # print_model_parameters(model)

    # set the criterion and optimizer
    print('Initializing the criterion and optimizer')
    criterion = nn.NLLLoss()
    optimizer = optim.Adam(
        params=model.parameters(),
        lr=LEARNING_RATE,
        weight_decay=WEIGHT_DECAY
    )        

    # initialize the results dict
    results = {
            'epoch': [],
            'train_loss': [],
            'train_acc': [],
            'val_loss': [],
            'val_acc': []
    }    

    print(f'Starting training at epoch 1...')
    for i in range(0, MAX_EPOCHS):
        st = time()
        
        # train
        model.train()
        optimizer.zero_grad()
        
        # forward pass
        output = model(features, adj)
        
        # compute the training loss and accuracy
        train_targets = labels[train_idxs].max(dim=1).indices
        train_loss = criterion(output[train_idxs], train_targets)
        train_acc = accuracy(output[train_idxs], train_targets)
        
        # backpropogate the loss
        train_loss.backward()
        optimizer.step()
        
        # evaluate
        model.eval()
        output = model(features, adj)
        val_targets = labels[val_idxs].max(dim=1).indices
        val_loss = criterion(output[val_idxs], val_targets)
        val_acc = accuracy(output[val_idxs], val_targets)
                
        # record results
        results['epoch'].append(i)
        results['train_loss'].append(train_loss.item())        
        results['train_acc'].append(train_acc.item())
        results['val_loss'].append(val_loss.item())
        results['val_acc'].append(val_acc.item())
        
        # print update
        print(f'Epoch: {i+1:02d} Train loss: {train_loss.item():0.4f} Train acc: {train_acc:0.4f} Val loss: {val_loss.item():0.4f} Val acc: {val_acc:0.4f} done in {time() - st} s')

        # create a checkpoint
        create_checkpoint(checkpoints_dir, i, model, optimizer, results)

    # test
    model.eval()
    output = model(features, adj)
    test_targets = labels[test_idxs].max(dim=1).indices
    test_loss = criterion(output[test_idxs], test_targets)
    test_acc = accuracy(output[test_idxs], test_targets)

    # record results
    results['test_loss'] = test_loss.item()
    results['test_acc'] = test_acc.item()
    
    # save the model and results
    save_model(models_dir, model)
    save_results(results_dir, results, model)
Beispiel #24
0
def homogenize(technique,ner_folder,data='server'):
    # Check if remote server is working, otherwise use local files
    remote_working = _check_status()
    if not remote_working or data=='local':
        print "Starting to load data from local files"
        _load_from_local(technique)
        print "Data loaded"

    if technique == 'all':
        techniques = ['spotlight','tagme','babelfy']
    else:
        techniques = [technique]

    for technique in techniques:
        output_folder = ner_folder + '/' + technique + "_h/" + "/"
        folder = ner_folder + '/' + technique
        utils.create_directories(output_folder)
        filenames = sorted(list(glob.glob(folder+"/*.json")))
        for file in filenames:
            name = file[file.rfind("/")+1:]
            sentences = json.load(codecs.open(file, "r", "utf-8"))
            for sentence in sentences:
                entities = []
                for entity in sentence['entities']:
                    add = True
                    if technique.lower() == "spotlight":
                        uri = entity['uri']
                        ret_categories = _get_categories(entity['uri'],
                                use_remote=remote_working)
                        if ret_categories:
                            entity['categories'] = ret_categories
                        if entity['types'] == "":
                            ret_types = _get_types(uri, use_remote=remote_working)
                            if ret_types:
                                entity['types'] = ",".join(ret_types)
                    elif technique.lower() == "tagme":
                        entity['types'] = ""
                        ret_id_dbpedia = _get_id_dbpedia(entity['id'],
                                 use_remote=remote_working)
                        if ret_id_dbpedia:
                            entity['uri'] = ret_id_dbpedia

                        elif 'uri' in entity:
                            entity['uri'] = "http://dbpedia.org/resource/"+entity['uri'].replace(" ","_")
                        else:
                            entity['uri'] = "NONE"
                            add = False
                        uri = entity['uri']
                        ret_types = _get_types(uri, use_remote=remote_working)
                        if ret_types:
                            entity['types'] = ",".join(ret_types)

                        formated_categories = []
                        if 'categories' in entity:
                            for category in entity['categories']:
                                formated_categories.append(category.replace(" ","_"))
                        entity['categories'] = formated_categories
                    elif technique.lower() == "babelfy":
                        entity['types'] = ""
                        if "dbpedia" in entity['uri']:
                            entity['uri'] = entity['uri'].replace("\\u0026","&").replace("\\u0027","'")
                            ret_categories = _get_categories(entity['uri'], use_remote=remote_working)
                            if ret_categories:
                                entity['categories'] = ret_categories
                            ret_types = _get_types(entity['uri'],
                                    use_remote=remote_working)
                            if ret_types:
                                entity['types'] = ",".join(ret_types)

                            entity['endChar'] += 1
                    ret_redirection = _get_redirections(entity['uri'],
                            use_remote=remote_working)
                    if ret_redirection:
                        entity['uri'] = ret_redirection
                    if add:
                        entities.append(entity)
                sentence['entities'] = entities
            print output_folder
            json.dump(sentences, codecs.open(output_folder+name, "w", "utf-8"))
            print name
def train():
    model_type = FLAGS.model_type
    run_desc = FLAGS.run_desc
    data_dir = Path(FLAGS.data_dir)
    checkpoints_dir = Path(FLAGS.checkpoints_dir) / model_type / run_desc
    models_dir = Path(FLAGS.models_dir) / model_type / run_desc
    results_dir = Path(FLAGS.results_dir) / model_type / run_desc
    #data_percentage = FLAGS.data_percentage

    if model_type == 'STL':
        only_fn = True
    else:
        only_fn = False

    # check if data directory exists
    if not data_dir.exists():
        raise ValueError('Data directory does not exist')

    # create other directories if they do not exist
    create_directories(checkpoints_dir, models_dir, results_dir)

    # load the data
    print('Loading the data...')

    # get the glove and elmo embeddings
    GloVe_vectors = GloVe()
    print('Uploaded GloVe embeddings.')
    # ELMo = Elmo(
    #         options_file=ELMO_OPTIONS_FILE,
    #         weight_file=ELMO_WEIGHT_FILE,
    #         num_output_representations=1,
    #         requires_grad=False,
    #         dropout=0).to(DEVICE)
    # print('Uploaded Elmo embeddings.')
    # get the fnn and snli data
    FNN = {}
    FNN_DL = {}

    for path in ['train', 'val', 'test']:
        FNN[path] = FNNDataset(data_dir / ('FNN_' + path + '.pkl'),
                               GloVe_vectors)
        FNN_DL[path] = data.DataLoader(dataset=FNN[path],
                                       batch_size=BATCH_SIZE_FN,
                                       num_workers=0,
                                       shuffle=True,
                                       drop_last=True,
                                       collate_fn=PadSortBatch())
    print('Uploaded FNN data.')
    if not only_fn:
        SNLI = {}
        SNLI_DL = {}
        for path in ['train', 'val', 'test']:
            SNLI[path] = SNLIDataset(data_dir / ('SNLI_' + path + '.pkl'),
                                     GloVe_vectors)
            SNLI_DL[path] = data.DataLoader(dataset=SNLI[path],
                                            batch_size=BATCH_SIZE_NLI,
                                            num_workers=0,
                                            shuffle=True,
                                            drop_last=True,
                                            collate_fn=PadSortBatchSNLI())
        print('Uploaded SNLI data.')
        snli_train_sent_no = len(SNLI['train']) * 2
        snli_train_len = len(SNLI['train'])
    fnn_train_sent_no = get_number_sentences(data_dir / 'FNN_train.pkl')
    fnn_train_len = len(FNN['train'])

    # initialize the model, according to the model type
    print('Initializing the model...', end=' ')
    if model_type == 'MTL':
        NUM_CLASSES_NLI = 3
        print("Loading an MTL HAN model.")
    elif model_type == 'STL':
        NUM_CLASSES_NLI = None
        print("Loading an STL HAN model.")
    elif model_type == 'Transfer':
        print("Nothing for now.")
    if ELMO_EMBED_DIM is not None:
        # input_dim = WORD_EMBED_DIM + ELMO_EMBED_DIM
        input_dim = WORD_EMBED_DIM
    else:
        input_dim = WORD_EMBED_DIM
    model = HierarchicalAttentionNet(input_dim=input_dim,
                                     hidden_dim=WORD_HIDDEN_DIM,
                                     num_classes_task_fn=NUM_CLASSES_FN,
                                     embedding=None,
                                     num_classes_task_nli=NUM_CLASSES_NLI,
                                     dropout=0).to(DEVICE)
    print('Working on: ', end='')
    print(DEVICE)
    print('Done!')
    print_model_parameters(model)
    print()

    # set the criterion and optimizer
    # we weigh the loss: class [0] is real, class [1] is fake
    #
    real_ratio, fake_ratio = get_class_balance(data_dir / 'FNN_train.pkl')
    weights = [(1.0 - real_ratio), (1.0 - fake_ratio)]
    print(weights)
    class_weights = torch.FloatTensor(weights).to(DEVICE)
    loss_func_fn = nn.CrossEntropyLoss(weight=class_weights)
    if not only_fn:
        loss_func_nli = nn.CrossEntropyLoss()
        temperature = 2
    optimizer = optim.Adam(params=model.parameters(), lr=LEARNING_RATE)

    # load the last checkpoint (if it exists)
    epoch, results, best_accuracy = load_latest_checkpoint(
        checkpoints_dir, model, optimizer)
    results_fn = {
        'epoch': [],
        'train_loss': [],
        'train_accuracy': [],
        'val_loss': [],
        'val_accuracy': []
    }
    results_nli = {
        'epoch': [],
        'train_loss': [],
        'train_accuracy': [],
        'val_loss': [],
        'val_accuracy': []
    }
    results = {'fn': results_fn, 'nli': results_nli}
    if epoch == 0:
        print(f'Starting training at epoch {epoch + 1}...')
    else:
        print(f'Resuming training from epoch {epoch + 1}...')

    for i in range(epoch, MAX_EPOCHS):
        print(f'Epoch {i+1:0{len(str(MAX_EPOCHS))}}/{MAX_EPOCHS}:')
        model.train()
        # one epoch of training
        if only_fn:
            train_loss_fn, train_acc_fn = train_epoch_fn(
                FNN_DL['train'], model, optimizer, loss_func_fn)
        elif model_type == 'MTL':
            model.train()

            train_loss_fn = []
            train_acc_fn = []
            loss_fn_weight_gradnorm = 1

            train_loss_nli = []
            train_acc_nli = []
            loss_nli_weight_gradnorm = 1

            #define by sentence number
            #loss_fn_weight_dataset = 1 - fnn_train_sent_no / (fnn_train_sent_no + snli_train_sent_no)
            #loss_nli_weight_dataset = 1 - snli_train_sent_no / (fnn_train_sent_no + snli_train_sent_no)
            loss_fn_weight_dataset = 1 - fnn_train_len / (fnn_train_len +
                                                          snli_train_len)
            loss_nli_weight_dataset = 1 - snli_train_len / (fnn_train_len +
                                                            snli_train_len)

            chance_fn = 1000 * (fnn_train_len / BATCH_SIZE_FN) / (
                (fnn_train_len / BATCH_SIZE_FN) +
                (snli_train_len / BATCH_SIZE_NLI))
            iterator_fnn = enumerate(FNN_DL['train'])
            iterator_snli = enumerate(SNLI_DL['train'])
            done_fnn, done_snli = False, False
            step_fnn = 0
            step_snli = 0
            print(
                f'Train set length, FNN: {fnn_train_len}. Train set length, SNLI: {snli_train_len}.'
            )
            print(
                f'Training set to batch size ratio for Fake News Detection is {fnn_train_len / BATCH_SIZE_FN}.'
            )
            print(
                f'Training set to batch size ratio for Language Inference is {snli_train_len / BATCH_SIZE_NLI}.'
            )

            while not (done_fnn and done_snli):
                if len(train_loss_fn) > 1 and len(train_loss_nli) > 1:
                    # computes loss weights based on the loss from the previous iterations
                    loss_fn_ratio = train_loss_fn[len(train_loss_fn) -
                                                  1] / train_loss_fn[
                                                      len(train_loss_fn) - 2]
                    loss_nli_ratio = train_loss_nli[
                        len(train_acc_nli) -
                        1] / train_loss_nli[len(train_loss_nli) - 2]
                    loss_fn_exp = math.exp(loss_fn_ratio / temperature)
                    loss_nli_exp = math.exp(loss_nli_ratio / temperature)
                    loss_fn_weight_gradnorm = loss_fn_exp / (loss_fn_exp +
                                                             loss_nli_exp)
                    loss_nli_weight_gradnorm = loss_nli_exp / (loss_fn_exp +
                                                               loss_nli_exp)
                    loss_fn_weight = math.exp(
                        loss_fn_weight_dataset * loss_fn_weight_gradnorm) / (
                            math.exp(loss_fn_weight_dataset *
                                     loss_fn_weight_gradnorm) +
                            math.exp(loss_nli_weight_dataset *
                                     loss_nli_weight_gradnorm))
                    loss_nli_weight = math.exp(
                        loss_nli_weight_dataset * loss_nli_weight_gradnorm) / (
                            math.exp(loss_fn_weight_dataset *
                                     loss_fn_weight_gradnorm) +
                            math.exp(loss_nli_weight_dataset *
                                     loss_nli_weight_gradnorm))
                else:
                    loss_fn_weight = loss_fn_weight_dataset
                    loss_nli_weight = loss_nli_weight_dataset

                # define the total loss function
                #loss_func = loss_func_fn + loss_func_nli
                # is this needed?

                if np.random.randint(0, 1000) < chance_fn:
                    try:
                        step_fnn, batch_fnn = next(iterator_fnn)
                    except StopIteration:
                        done_fnn = True
                    else:
                        try:
                            batch_loss_fn, batch_acc_fn = train_batch_fn(
                                batch_fnn, model, optimizer, loss_func_fn,
                                loss_fn_weight)
                            train_loss_fn.append(batch_loss_fn)
                            train_acc_fn.append(batch_acc_fn)
                        except:
                            print('Error in batch')
                else:
                    try:
                        step_snli, batch_snli = next(iterator_snli)
                    except StopIteration:
                        done_snli = True
                    else:
                        try:
                            batch_loss_nli, batch_acc_nli = train_batch_nli(
                                batch_snli, model, optimizer, loss_func_nli,
                                loss_nli_weight)
                            train_loss_nli.append(batch_loss_nli)
                            train_acc_nli.append(batch_acc_nli)
                        except:
                            print('Error in batch')
                print(f'FNN batch {step_fnn}')
                print(f'SNLI batch {step_snli}')
                if step_fnn % 50 == 0 and step_fnn != 0:
                    print(f'Processed {step_fnn} FNN batches.')
                    print(f'Accuracy: {train_acc_fn[len(train_acc_fn)-1]}.')
                    print(
                        f'Weight for loss for NLI is {loss_nli_weight}, for loss for FN is {loss_fn_weight}.'
                    )
                if step_snli % 50 == 0 and step_snli != 0:
                    print(f'Processed {step_snli} SNLIbatches.')
                    print(f'Accuracy: {train_acc_nli[len(train_acc_nli)-1]}.')
                    print(
                        f'Weight for loss for NLI is {loss_nli_weight}, for loss for FN is {loss_fn_weight}.'
                    )
        # one epoch of eval
        model.eval()
        val_loss_fn, val_acc_fn = eval_epoch_fn(FNN_DL['val'], model,
                                                loss_func_fn)
        tasks = ['fn']
        if model_type == 'MTL':
            val_loss_nli, val_acc_nli = eval_epoch_nli(SNLI_DL['val'], model,
                                                       loss_func_nli)
            tasks.append('nli')

        for task in tasks:
            results[task]['epoch'].append(i)
            if task == 'fn':
                temp_train_loss = train_loss_fn
                temp_val_loss = val_loss_fn
                temp_train_acc = train_acc_fn
                temp_val_acc = val_acc_fn
            elif task == 'nli':
                temp_train_loss = train_loss_nli
                temp_val_loss = val_loss_nli
                temp_train_acc = train_acc_nli
                temp_val_acc = val_acc_nli

            results[task]['train_loss'].append(temp_train_loss)
            results[task]['train_accuracy'].append(temp_train_acc)
            results[task]['val_loss'].append(temp_val_loss)
            results[task]['val_accuracy'].append(temp_val_acc)
            print(results)

        best_accuracy = torch.tensor(temp_val_acc).max().item()
        create_checkpoint(checkpoints_dir, epoch, model, optimizer, results,
                          best_accuracy)

    # save and plot the results
    save_results(results_dir, results, model)
    save_model(models_dir, model)
    plot_results(results_dir, results, model)
Beispiel #26
0
# <parte>:            'part2' se for a parte 2, se for a parte, apenas '' (dois apóstrofos)

# Exemplo:
# executions = [
#     # ('HOG', '32', '')               <- HOG, 32 neurônios, parte 1
#     # ('LBP', '160', 'part2')    <- LBP, 160 neurônios, parte 2
# ]
# O número de tuplas de teste é ilimitado, então use de acordo com sua necessidade.
# Ao definir todos os casos, execute no terminal: python3 src/run.py

executions = [('HOG', '32', '')('LBP', '160', '')]

if __name__ == '__main__':
    start = datetime.now()

    for run_num, e in enumerate(executions):
        directory = 'output/{desc}-N{hn:03}-P{part}-{datetime}/'.format(
            desc=e[0],
            neurons=e[1],
            part=2 if 'part2' in e else 1,
            datetime=start.strftime('%Y-%m-%d-%H-%M'),
            hn=int(e[1]))

        u.create_directories(['output', directory])
        command = 'python3.6 src/cross-validation.py '
        command += '{desc} {neurons:3} {part:5} {directory} > {directory}log.txt &'.format(
            desc=e[0], neurons=e[1], part=e[2], directory=directory)

        os.system(command)
        print('{}. Running: {}'.format(str(run_num + 1).zfill(2), command))
def test():
    model_type = FLAGS.model_type
    run_desc = FLAGS.run_desc
    data_dir = Path(FLAGS.data_dir)
    checkpoints_dir = Path(FLAGS.checkpoints_dir) / model_type / run_desc
    models_dir = Path(FLAGS.models_dir) / model_type / run_desc
    results_dir = Path(FLAGS.results_dir) / model_type / run_desc
    learning_rate = LEARNING_RATE
    epoch_no = FLAGS.epoch
    if not data_dir.exists():
        raise ValueError('Data directory does not exist')

    # create other directories if they do not exist
    create_directories(checkpoints_dir, models_dir, results_dir)

    # load the data
    print('Loading the data...')

    # get the glove and elmo embedding
    glove_dim = 0
    elmo_dim = 0
    GloVe_vectors = None
    ELMo = None
    if 'glove' in model_type:
        GloVe_vectors = GloVe()
        glove_dim = WORD_EMBED_DIM
        print('Uploaded GloVe embeddings.')
    if 'elmo' in model_type:
        ELMo = Elmo(options_file=ELMO_OPTIONS_FILE,
                    weight_file=ELMO_WEIGHT_FILE,
                    num_output_representations=1,
                    requires_grad=False,
                    dropout=0).to(DEVICE)
        elmo_dim = ELMO_EMBED_DIM
        print('Uploaded Elmo embeddings.')
    input_dim = glove_dim + elmo_dim
    # get the fnn and snli data
    keys = ['train', 'test', 'val']
    FNN_DL_small = {}
    for i in keys:
        FNN_temp = FNNDataset(data_dir / ('FNN_small_' + i + '.pkl'),
                              GloVe_vectors, ELMo)
        FNN_DL_temp = data.DataLoader(dataset=FNN_temp,
                                      batch_size=BATCH_SIZE_FN,
                                      num_workers=0,
                                      shuffle=True,
                                      drop_last=True,
                                      collate_fn=PadSortBatchFNN())
        FNN_DL_small[i] = FNN_DL_temp
    print('Uploaded FNN data.')

    print('Initializing the model...', end=' ')

    model = initialize_han(input_dim, WORD_HIDDEN_DIM, NUM_CLASSES_FN, DEVICE)

    print('Working on: ', end='')
    print(DEVICE)
    print('Done!')
    print_model_parameters(model)
    print()

    optimizer = optim.Adam(params=model.parameters(), lr=LEARNING_RATE)

    print('Loading model weights.')
    #model.load_state_dict(torch.load(CHECKPOINTS_DIR_DEFAULT / 'HierarchicalAttentionNet_model.pt'))
    if epoch_no == '0':
        model_path = models_dir / Path('HierarchicalAttentionNet_model.pt')
        model = load_model(model_path, model, checkpoint=False)
        #_, _, _ = load_latest_checkpoint(model_path, model, optimizer)
    else:
        checkpoint_path = checkpoints_dir / Path(
            'HierarchicalAttentionNet_Adam_checkpoint_' + str(epoch_no) +
            '_.pt')
        model = load_model(checkpoint_path, model, checkpoint=True)
        #_, _, _ = load_checkpoint(checkpoint_path, model, optimizer)

    #model.eval()
    loss_func_fn = nn.CrossEntropyLoss()
    #y_pred = []
    #y_true = []
    for split in keys:
        all_embeds = []
        for step, batch in enumerate(FNN_DL_small[split]):
            embeds = get_article_embeddings(model, batch)
            all_embeds.append(embeds[0])
        pkl.dump(
            all_embeds,
            open(
                data_dir /
                ('FNN_small_embeds_' + model_type + '_' + split + '.pkl'),
                'wb'))
def train():
    model_type = FLAGS.model_type
    run_desc = FLAGS.run_desc
    data_dir = Path(FLAGS.data_dir)
    checkpoints_dir = Path(FLAGS.checkpoints_dir) / model_type / run_desc
    models_dir = Path(FLAGS.models_dir) / model_type / run_desc
    results_dir = Path(FLAGS.results_dir) / model_type / run_desc
    learning_rate = LEARNING_RATE
    sent_hidden_dim = FLAGS.sent_hidden_dim
    doc_hidden_dim = FLAGS.doc_hidden_dim

    if not data_dir.exists():
        raise ValueError('Data directory does not exist')

    # create other directories if they do not exist
    create_directories(checkpoints_dir, models_dir, results_dir)

    # load the data
    print('Loading the data...')

    # get the glove and elmo embedding
    glove_dim = 0
    elmo_dim = 0
    GloVe_vectors = None
    ELMo = None
    if 'glove' in model_type:
        GloVe_vectors = GloVe()
        glove_dim = WORD_EMBED_DIM
        print('Uploaded GloVe embeddings.')
    if 'elmo' in model_type:
        ELMo = Elmo(options_file=ELMO_OPTIONS_FILE,
                    weight_file=ELMO_WEIGHT_FILE,
                    num_output_representations=1,
                    requires_grad=False,
                    dropout=0).to(DEVICE)
        elmo_dim = ELMO_EMBED_DIM
        print('Uploaded Elmo embeddings.')
    input_dim = glove_dim + elmo_dim
    # get the fnn and snli data
    FNN = {}
    FNN_DL = {}

    for path in ['train', 'val', 'test']:
        FNN[path] = FNNDataset(data_dir / ('FNN_' + path + '.pkl'),
                               GloVe_vectors, ELMo)
        FNN_DL[path] = data.DataLoader(dataset=FNN[path],
                                       batch_size=BATCH_SIZE_FN,
                                       num_workers=0,
                                       shuffle=True,
                                       drop_last=True,
                                       collate_fn=PadSortBatchFNN())
    print('Uploaded FNN data.')

    fnn_train_sent_no = get_number_sentences(data_dir / 'FNN_train.pkl')
    fnn_train_len = len(FNN['train'])

    # initialize the model, according to the model type
    print('Initializing the model...', end=' ')

    model = HierarchicalAttentionNet(input_dim=input_dim,
                                     sent_hidden_dim=sent_hidden_dim,
                                     doc_hidden_dim=doc_hidden_dim,
                                     num_classes=NUM_CLASSES_FN,
                                     dropout=0).to(DEVICE)
    print('Working on: ', end='')
    print(DEVICE)
    print('Done!')
    print_model_parameters(model)
    print()

    # set the criterion and optimizer
    # we weigh the loss: class [0] is real, class [1] is fake
    #
    real_ratio, fake_ratio = get_class_balance(data_dir / 'FNN_train.pkl')
    weights = [(1.0 - real_ratio), (1.0 - fake_ratio)]
    print(weights)
    class_weights = torch.FloatTensor(weights).to(DEVICE)
    loss_func_fn = nn.CrossEntropyLoss(weight=class_weights)
    optimizer = optim.Adam(params=model.parameters(), lr=LEARNING_RATE)

    # load the last checkpoint (if it exists)
    results = {
        'epoch': [],
        'train_loss': [],
        'train_accuracy': [],
        'val_loss': [],
        'val_accuracy': []
    }
    epoch, results, best_accuracy = load_latest_checkpoint(
        checkpoints_dir, model, optimizer)
    if epoch == 0:
        print(f'Starting training at epoch {epoch + 1}...')
    else:
        print(f'Resuming training from epoch {epoch + 1}...')

    for i in range(epoch, MAX_EPOCHS):
        print(f'Epoch {i+1:0{len(str(MAX_EPOCHS))}}/{MAX_EPOCHS}:')
        model.train()
        # one epoch of training
        train_loss_fn, train_acc_fn = train_epoch_fn(FNN_DL['train'], model,
                                                     optimizer, loss_func_fn)

        # one epoch of eval
        model.eval()
        val_loss_fn, val_acc_fn = eval_epoch_fn(FNN_DL['val'], model,
                                                loss_func_fn)

        results['epoch'].append(i)
        results['train_loss'].append(train_loss_fn)
        results['train_accuracy'].append(train_acc_fn)
        results['val_loss'].append(val_loss_fn)
        results['val_accuracy'].append(val_acc_fn)
        #print(results)
        best_accuracy = torch.tensor(val_acc_fn).max().item()
        create_checkpoint(checkpoints_dir, i, model, optimizer, results,
                          best_accuracy)
        if (i + 1) % 4 == 0 and i != 0:
            learning_rate = learning_rate / 2
            optimizer = optim.Adam(params=model.parameters(), lr=learning_rate)

    # save and plot the results
    save_results(results_dir, results, model)
    save_model(models_dir, model)
Beispiel #29
0
                filename = name + "/" + "{}_{}_{}.jpg".format(
                    name, datetime_format, count).lower()
                filename_list.append(filename)
                utils.save_cv2_image(IMAGE_ORIGINAL_DIR, filename, frame)
                count += 1

            frame_no += 1

    if OCI_STORAGE_SYNC:
        pool = multiprocessing.pool.ThreadPool(processes=3)
        pool.apply_async(oci_utils.upload_to_object_storage,
                         args=[
                             config, IMAGE_ORIGINAL_DIR,
                             OCI_STORAGE_BUCKET_NAME, filename_list
                         ])
        pool.close()

    return json.dumps({'result': 'success', 'message': 'File uploaded!'})


print("facerec_service loaded!")
utils.create_directories(
    [IMAGE_UPLOAD_DIR, IMAGE_ORIGINAL_DIR, IMAGE_PREPARED_DIR])
config = oci.config.from_file(OCI_CONFIG_PATH, "DEFAULT")

if OCI_STORAGE_SYNC:
    oci_utils.syncronize_with_object_storage(config, IMAGE_ORIGINAL_DIR,
                                             OCI_STORAGE_BUCKET_NAME)

face_recognition = face.Recognition()
#train()
Beispiel #30
0
    print(score)

    if save_model:
        print("Saving model")
        model.save(final_model)

    if predictions:
        print("Running predictions on test data")
        test_data = np.load(f"{config['Paths']['ising data']}/test_data.npz")
        test_data = test_data["data"]
        predictions = model.predict(test_data)
        prediction_name = config['Paths'][
            'predictions'] + "/predictions_" + str(
                model_iteration) + "_" + str(model_type) + ".npy"
        np.save(prediction_name, predictions)


print("Training Network")
create_directories()
predictions = cnn_regressor(model_iteration=argv[2],
                            model_type=argv[3],
                            activation=argv[4],
                            optimizer=argv[5],
                            dropout=argv[6],
                            batchnorm=argv[7],
                            batchnorm_order=argv[8],
                            batch_size=argv[9],
                            learning_rate=argv[10],
                            training_data_length=argv[11],
                            shallow_network=argv[12])
Beispiel #31
0
AUTOTUNE =  tf.data.experimental.AUTOTUNE

# download data_set flowes photos / if you already have the directory, you can comment this line
datadir = keras.utils.get_file(origin='https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz',fname='flower_photos',untar=True)
datadir = pathlib.Path(datadir)

# count imagem flowers / if you already have the directory, you can comment this line
image_count = len(list(datadir.glob("*/*.jpg")))
print("Numero de Imagens : {}".format(image_count))

# get class names/ if you already have the directory, you can comment this line
CLASS_NAME = np.array([item.name for item in datadir.glob('*') if item.name != 'LICENSE.txt'])
print(CLASS_NAME)

# create_directories by flowers / if you already have the directory, you can comment this line
utl.create_directories()

# create train and test folders / if you already have the directory (train,test), you can comment this line
utl.create_test_train_folder(datadir,CLASS_NAME)

#define model 
model = iam.define_model()

# run model
iam.run(model)

# get imgs roses for predict example
roses = list(datadir.glob('roses/*'))

# run predict 
pred.run_example(roses[2])
                                   training_this_round, testing_this_round,
                                   fold_i))
        thread_list.append(t)

    # Starts threads
    for thread in thread_list:
        thread.start()

    for thread in thread_list:
        thread.join()


# início da execução
if __name__ == "__main__":
    # horário de execução, descritor, parâmetros, diretórios, lista de classes e lista de dataset
    start_algorithm = datetime.now()
    arguments = get_arguments()
    parameters = p.get_parameters(arguments['descriptor'], 'part2' in sys.argv,
                                  arguments['neurons'], arguments['output'])
    print(parameters)
    u.create_directories(['data', 'src', 'output'])
    dataset = u.get_dataset_list(u.get_classes_list(parameters['workpath']),
                                 parameters['workpath'])
    k_fold(dataset, len(dataset), parameters, start_algorithm)
    print("Main Start Time:   \t\t\t\t\t\t{}".format(
        start_algorithm.strftime("%Y-%m-%d %H:%M:%S")))
    print("Main End Time:      \t\t\t\t\t\t{}".format(
        datetime.now().strftime("%Y-%m-%d %H:%M:%S")))
    print("Total time running: \t\t\t\t\t\t{}\n".format(datetime.now() -
                                                        start_algorithm))
Beispiel #33
0
    features, act_labels, verbose=True)

train_data, act_train_labels = train_loader.time_series_to_section(
    train_ts.copy(),
    num_act_labels,
    sliding_window_size=200,
    step_size_of_sliding_window=10)

test_data, act_test_labels = train_loader.time_series_to_section(
    test_ts.copy(),
    num_act_labels,
    sliding_window_size=200,
    step_size_of_sliding_window=10)

print("---Data is successfully loaded")
handler = DataHandler(train_data, test_data)
norm_train = handler.normalise("train")
norm_test = handler.normalise("test")

print("--- Shape of Training Data:", train_data.shape)
print("--- Shape of Test Data:", test_data.shape)

expt_name = "thurs_Script_jog2"

create_directories(expt_name)
gan_ = GAN(norm_train.shape)
trainer_ = Trainer(gan_, expt_name)
trainer_.train_gan(epochs=200,
                   batch_size=128,
                   sample_interval=10,
                   train_data=norm_train)
Beispiel #34
0
    """

    print(params['feature'])

    train_x, train_y = data_loader.load_combined_data(params['feature'],
                                                      'train')
    valid_x, valid_y = data_loader.load_combined_data(params['feature'],
                                                      'valid')

    params['name'] = utils.create_name(params)
    model = train_model(feature, params, train_x, train_y, valid_x, valid_y)

    test_x, test_y = data_loader.load_combined_data(params['feature'], 'test')
    print('Calculating performance on test set')
    print('AUC', test_model(model, test_x, test_y, params))


if __name__ == "__main__":
    for feature in [
            'hl',
            'et_and_ht',
            'et',
            'ht',
            'et_and_ht_and_hl',
            'mass',
            'hl_and_mass',
    ]:
        utils.create_directories(feature)
        params = utils.get_optimal_params(feature)
        main(feature, params)