コード例 #1
0
    def __init__(self,args,mode='train'):
        if mode == 'train':
            self.sql_data, self.table_data, self.val_sql_data, self.val_table_data = utils.load_dataset(args.dataset, use_small=args.use_small)
        else:
            self.val_sql_data, self.val_table_data = utils.load_dataset(args.dataset, use_small=args.use_small,mode=mode)

        if args.print_info:
            if mode == 'train':
                print('Train sql_data size:{},train table numbers:{}\n'
                    'Dev sql_data size:{},dev table numbers:{}'.format(len(self.sql_data), len(self.table_data), len(self.val_sql_data),
                                                                     len(self.val_table_data)))
            else:
                print('Dev sql_data size:{},dev table numbers:{}'.format(len(self.val_sql_data),
                                                                         len(self.val_table_data)))
コード例 #2
0
def main():
    """ Runs data processing scripts to turn raw data from (../raw) into
        cleaned data ready to be analyzed (saved in ../processed).
    """
    logger = logging.getLogger(__name__)
    # logger.info(input_filepath, output_filepath)
    raw = home / 'data' / 'raw'
    raw = load_dataset(raw)

    dija = raw['DJIA_table_train'].copy()
    expected_labels = generate_labels(dija)
    dija_labels = expected_labels.loc[:, 'label'].to_frame()
    dija_labels.to_csv(home / 'data' / 'interim' / 'dija-labels.csv')

    comb = raw['Combined_News_DJIA_train'].copy()

    news_cols = [c for c in comb.columns if 'Top' in c]

    for name in news_cols:
        col = comb.loc[:, name]
        col = col.fillna(' ')
        col = col.apply(lambda x: x.strip('b'))
        col = col.apply(lambda x: x.strip('"'))
        col = col.apply(lambda x: x.strip("'"))
        comb.loc[:, name] = col

    comb.loc[:, 'Label'] = comb.loc[:,
                                    'Label'].fillna(dija_labels.loc[:,
                                                                    'label'])
    assert sum(comb.loc[:, 'Label'].isnull()) == 0
    comb.iloc[:, 1:] = comb.iloc[:, 1:].fillna(" ")
    print('saving combined to data/interim')
    comb.to_csv(home / 'data' / 'interim' / 'combined.csv', index=True)
コード例 #3
0
ファイル: eval.py プロジェクト: bxclib2/IRNet-1
def evaluate(args):
    """
    :param args:
    :return:
    """

    grammar = semQL.Grammar()
    sql_data, table_data, val_sql_data,\
    val_table_data= utils.load_dataset(args.dataset, use_small=args.toy)

    model = IRNet(args, grammar)

    if args.cuda: model.cuda()

    print('load pretrained model from %s'% (args.load_model))
    pretrained_model = torch.load(args.load_model,
                                     map_location=lambda storage, loc: storage)
    import copy
    pretrained_modeled = copy.deepcopy(pretrained_model)
    for k in pretrained_model.keys():
        if k not in model.state_dict().keys():
            del pretrained_modeled[k]

    model.load_state_dict(pretrained_modeled)

    model.word_emb = utils.load_word_emb(args.glove_embed_path)

    json_datas, sketch_acc, acc = utils.epoch_acc(model, args.batch_size, val_sql_data, val_table_data,
                           beam_size=args.beam_size)
    print('Sketch Acc: %f, Acc: %f' % (sketch_acc, acc))
    # utils.eval_acc(json_datas, val_sql_data)
    import json
    with open('./predict_lf.json', 'w') as f:
        json.dump(json_datas, f)
コード例 #4
0
def run_split(split):
    print(split)
    if not TRIAL_RUN:
        args.dataset = split
        sql_data, table_data, val_sql_data,\
            val_table_data = utils.load_dataset(args.dataset, use_small=args.toy)
        json_datas, sketch_acc, acc = utils.epoch_acc(model,
                                                      args.batch_size,
                                                      val_sql_data,
                                                      val_table_data,
                                                      beam_size=args.beam_size)
        print('Sketch Acc: %f, Acc: %f' % (sketch_acc, acc))
        with open(os.path.join(split, 'predict_lf.json'), 'w') as f:
            json.dump(json_datas, f)
        subprocess.run([
            "python", "./sem2SQL.py", "--data_path", split, "--input_path",
            os.path.join(split, 'predict_lf.json'), "--output_path",
            os.path.join(split, 'output.txt')
        ],
                       cwd="/IRNet")
    else:
        print("Trial run")
        with open(os.path.join(split, 'output.txt'), 'w') as f:
            f.write('trial run\n')
        with open(os.path.join(split, 'predict_lf.json'), 'w') as f:
            json.dump({'trial': 'run'}, f)
    results = {}
    with open(os.path.join(split, 'output.txt'), 'r') as f:
        results["sql"] = f.read().strip()
    with open(os.path.join(split, 'predict_lf.json'), 'r') as f:
        results["interpretation"] = json.load(f)
    message = {"split": split, "result": results}
    return message
コード例 #5
0
def main():
    print("running main ...")
    utils.update_dataset()
    train_x, train_y, test_x, test_y = utils.load_dataset()
    # utils.check_data(train_x, train_y, 5)
    # utils.check_data(test_x, test_y, 5)
    networks.train_model(train_x, train_y, test_x, test_y)
コード例 #6
0
 def __init__(self, args):
     """
     :param args: 参数对象
     """
     self.args = args
     self.dataset = load_dataset(self.args.dataset_folder,
                                 self.args.dataset_name)
     self.data = self.dataset[0]
     self.load_to_device()
コード例 #7
0
def main():
    """ Runs data processing scripts to turn raw data from (../raw) into
        cleaned data ready to be analyzed (saved in ../processed).
    """
    logger = logging.getLogger(__name__)
    # logger.info(input_filepath, output_filepath)

    from src.utils import load_dataset, home

    raw = home / 'data' / 'interim'
    raw = load_dataset(raw)
    comb = raw['combined']
    process_combined(comb, 'processed')
コード例 #8
0
def main():

    f = load_dataset(home / 'data' / 'processed')
    x_tr, y_tr, x_te, y_te = f['x_tr'], f['y_tr'], f['x_te'], f['y_te']

    mdl = RandomForestClassifier(**rf_params)
    mdl, res = fit(mdl, x_tr, y_tr, x_te, y_te)

    print('fitting final model')
    x = pd.concat([x_tr, x_te], axis=0)
    y = pd.concat([y_tr, y_te], axis=0)

    mdl, res = fit(mdl, x, y, x, y)
    from joblib import dump
    dump(mdl, home / 'models' / 'final.joblib')
コード例 #9
0
def main():

    mdl = load(home / 'models' / 'final.joblib')
    dataset = load_dataset(home / 'data' / 'holdout')

    comb = dataset['Combined_News_DJIA_test']
    f = process_combined(comb, 'holdout')

    x_tr, y_tr, x_te, y_te = f['x_tr'], f['y_tr'], f['x_te'], f['y_te']

    x = pd.concat([x_tr, x_te], axis=0)
    y = pd.concat([y_tr, y_te], axis=0)

    print('holdout x shape {}'.format(x.shape))

    acc = mdl.score(x, y)
    print('final model accuracy on holdout {}'.format(acc))
コード例 #10
0
def builtin_train(args):
    # 1. load dataset and model
    (train_images, train_labels), (test_images,
                                   test_labels) = load_dataset(args.data)
    input_shape = train_images[:args.batch_size, :, :, :].shape
    output_size = max(train_labels) + 1
    model = load_model(args.arch,
                       input_shape=input_shape,
                       output_size=output_size)
    model.summary()

    # 2. set tensorboard cofigs
    logdir = os.path.join(args.logdir, get_current_time())
    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)

    # 3. loss, optimizer, metrics setting
    model.compile(
        optimizer=tf.keras.optimizers.Adam(),
        loss=tf.keras.losses.SparseCategoricalCrossentropy(),
        metrics=["accuracy"],
    )

    # 4. dataset config
    buffer_size = len(train_images)
    train_ds = tf.data.Dataset.from_tensor_slices((train_images, train_labels))
    train_ds = train_ds.shuffle(buffer_size)
    if args.augmentation:
        train_ds = train_ds.map(augment)
    train_ds = train_ds.batch(args.batch_size)
    test_ds = tf.data.Dataset.from_tensor_slices((test_images, test_labels))
    test_ds = test_ds.batch(args.batch_size)

    fit_params = {}
    fit_params["batch_size"] = args.batch_size
    fit_params["epochs"] = args.max_epoch
    if args.steps_per_epoch:
        fit_params["steps_per_epoch"] = args.steps_per_epoch
    fit_params["verbose"] = 1
    fit_params["shuffle"] = True
    fit_params["callbacks"] = [tensorboard_callback]
    fit_params["validation_data"] = test_ds

    # 5. start train and test
    model.fit(train_ds, **fit_params)
コード例 #11
0
def fit(x_train,
        y_train,
        batch_size,
        device,
        model_name='densenet121',
        opt='Adagrad',
        dataset='iris',
        writer=None,
        label_col_name=''):
    # train_loader, val_loader, test_loader = load_data(dataset, label_col_name=label_col_name)
    train_loader, nb_classes = load_dataset(x_train, y_train, batch_size,
                                            device)

    # Model selection
    model = load_model(model_name, nb_classes=nb_classes)

    # Optimizer
    optimizer = opt_selection(model, opt)

    # Loss Criterion
    criterion = nn.CrossEntropyLoss()

    best_train = 0.0, 0.0
    for epoch in range(1, args.epochs + 1):
        # Train and Validate
        train_stats = train_step(model, criterion, optimizer, train_loader)
        # Logging
        logging(epoch, train_stats, writer)

        # Keep best model
        if train_stats['accuracy'] >= best_train:
            best_train = train_stats['accuracy']
            best_model_weights = copy.deepcopy(model.state_dict())

    # Load best model and evaluate on test set
    model.load_state_dict(best_model_weights)

    # print('\nBests Model Accuracies: Train: {:4.2f} | Val: {:4.2f} | Test: {:4.2f}'.format(best_train, best_val, test_stats['accuracy']))
    print('\nBests Model Accuracies: Train: {:4.2f}'.format(best_train))

    return model
コード例 #12
0
ファイル: train.py プロジェクト: jiapyliu/IRNet
    except Exception as e:
        # Save model
        utils.save_checkpoint(model,
                              os.path.join(model_save_path, 'end_model.model'))
        print(e)
        tb = traceback.format_exc()
        print(tb)
    else:
        utils.save_checkpoint(model,
                              os.path.join(model_save_path, 'end_model.model'))
        json_datas, sketch_acc, acc = utils.epoch_acc(model,
                                                      args.batch_size,
                                                      val_sql_data,
                                                      val_table_data,
                                                      beam_size=args.beam_size)
        # acc = utils.eval_acc(json_datas, val_sql_data)

        print("Sketch Acc: %f, Acc: %f, Beam Acc: %f" % (
            sketch_acc,
            acc,
            acc,
        ))


if __name__ == '__main__':
    # arg_parser = arg.init_arg_parser()
    # args = arg.init_config(arg_parser)
    # print(args)
    # train(args)
    val_table_data = utils.load_dataset('./data')
コード例 #13
0
def train(args):
    """
    :param args:
    :return:
    """
    grammar = semQL.Grammar()
    sql_data, table_data, val_sql_data, val_table_data = utils.load_dataset(
        args.dataset, use_small=args.toy)

    model = IRNet(args, grammar)
    if args.cuda: model.cuda()

    # now get the optimizer
    optimizer_cls = eval('torch.optim.%s' % args.optimizer)
    optimizer = optimizer_cls(model.parameters(), lr=args.lr)
    print('Enable Learning Rate Scheduler: ', args.lr_scheduler)
    if args.lr_scheduler:
        scheduler = optim.lr_scheduler.MultiStepLR(
            optimizer, milestones=[21, 41], gamma=args.lr_scheduler_gammar)
    else:
        scheduler = None

    print('Loss epoch threshold: %d' % args.loss_epoch_threshold)
    print('Sketch loss coefficient: %f' % args.sketch_loss_coefficient)

    if args.load_model:
        print('load pretrained model from %s' % (args.load_model))
        pretrained_model = torch.load(
            args.load_model, map_location=lambda storage, loc: storage)
        pretrained_modeled = copy.deepcopy(pretrained_model)
        for k in pretrained_model.keys():
            if k not in model.state_dict().keys():
                del pretrained_modeled[k]

        model.load_state_dict(pretrained_modeled)

    model.word_emb = utils.load_word_emb(args.glove_embed_path)
    # begin train

    model_save_path = utils.init_log_checkpoint_path(args)
    utils.save_args(args, os.path.join(model_save_path, 'config.json'))
    best_dev_acc = .0

    try:
        with open(os.path.join(model_save_path, 'epoch.log'), 'w') as epoch_fd:
            for epoch in tqdm.tqdm(range(args.epoch)):
                if args.lr_scheduler:
                    scheduler.step()
                epoch_begin = time.time()
                loss = utils.epoch_train(
                    model,
                    optimizer,
                    args.batch_size,
                    sql_data,
                    table_data,
                    args,
                    loss_epoch_threshold=args.loss_epoch_threshold,
                    sketch_loss_coefficient=args.sketch_loss_coefficient)
                epoch_end = time.time()
                json_datas, sketch_acc, acc, counts, corrects = utils.epoch_acc(
                    model,
                    args.batch_size,
                    val_sql_data,
                    val_table_data,
                    beam_size=args.beam_size)
                # acc = utils.eval_acc(json_datas, val_sql_data)

                if acc > best_dev_acc:
                    utils.save_checkpoint(
                        model, os.path.join(model_save_path,
                                            'best_model.model'))
                    best_dev_acc = acc
                utils.save_checkpoint(
                    model,
                    os.path.join(model_save_path, '{%s}_{%s}.model') %
                    (epoch, acc))

                log_str = 'Epoch: %d, Loss: %f, Sketch Acc: %f, Acc: %f, time: %f\n' % (
                    epoch + 1, loss, sketch_acc, acc, epoch_end - epoch_begin)
                tqdm.tqdm.write(log_str)
                epoch_fd.write(log_str)
                epoch_fd.flush()
    except Exception as e:
        # Save model
        utils.save_checkpoint(model,
                              os.path.join(model_save_path, 'end_model.model'))
        print(e)
        tb = traceback.format_exc()
        print(tb)
    else:
        utils.save_checkpoint(model,
                              os.path.join(model_save_path, 'end_model.model'))
        json_datas, sketch_acc, acc, counts, corrects = utils.epoch_acc(
            model,
            args.batch_size,
            val_sql_data,
            val_table_data,
            beam_size=args.beam_size)
        # acc = utils.eval_acc(json_datas, val_sql_data)

        print("Sketch Acc: %f, Acc: %f, Beam Acc: %f" % (
            sketch_acc,
            acc,
            acc,
        ))
コード例 #14
0
def main():
    utils.load_dataset()
    utils.generate_mask()
コード例 #15
0
# --------------------------------------------------------------------------------------------------- SETTINGS
args = get_args(__file__, options="bcdefrsvz")

v_print = get_verbose_print(args.verbose)

comp_params = {
    "loss": 'categorical_crossentropy',
    "optimizer": 'adam',
    "metrics": ['accuracy']
}

# --------------------------------------------------------------------------------------------- GET CLASSIFIER

# Get dataset
(X_train, Y_train), (X_test, Y_test), _, _ = load_dataset(args.dataset)

if os.path.isfile(args.dataset):
    X_train = np.load(args.dataset)
    Y_train = Y_train if "train.npy" in args.dataset else Y_test
im_shape = X_train[0].shape

session = tf.Session()
k.set_session(session)

if args.classifier == "cnn":
    classifier = CNN(im_shape,
                     act=args.act,
                     bnorm=False,
                     defences=args.defences,
                     dataset=args.dataset)
from src.attacks.deepfool import DeepFool
from src.attacks.fast_gradient import FastGradientMethod
from src.attacks.saliency_map import SaliencyMapMethod
from src.attacks.universal_perturbation import UniversalPerturbation
from src.attacks.virtual_adversarial import VirtualAdversarialMethod
from src.classifiers.utils import load_classifier

from src.utils import get_args, get_verbose_print, load_dataset, make_directory

# --------------------------------------------------------------------------------------------------- SETTINGS
args = get_args(__file__, load_classifier=True, options="adsv")
v_print = get_verbose_print(args.verbose)
alpha = 0.05  # constant for random perturbation

# get dataset
(X_train, Y_train), (X_test, Y_test), min_, max_ = load_dataset(args.dataset)

session = tf.Session()
k.set_session(session)

# Load classification model
MODEL_PATH = os.path.join(os.path.abspath(args.load), "")
classifier = load_classifier(MODEL_PATH, "best-weights.h5")

if args.save:
    SAVE_ADV = os.path.join(os.path.abspath(args.save), args.adv_method)
    make_directory(SAVE_ADV)

    with open(os.path.join(SAVE_ADV, "readme.txt"), "w") as wfile:
        wfile.write("Model used for crafting the adversarial examples is in " +
                    MODEL_PATH)
コード例 #17
0
def preproc_routines(p, prefix):

    draw_line = "=" * 79
    logging.info(f"Reading data from file\n{draw_line}")
    data = load_dataset(p['data_filepath'], chunk=True)
    uf_filepath = 'data/UF.csv'
    uf_data = load_dataset(p['uf_filepath'], chunk=False)
    ncm_filepath = 'data/NCM.csv'
    ncm_data = load_dataset(p['ncm_filepath'], chunk=False)
    country_filepath = 'data/PAIS.csv'
    country_data = load_dataset(p['country_filepath'], chunk=False)

    logging.info(f"""Filtering data to keep only 
                 {p['year_attribute']}:{p['years_to_keep']}\n{draw_line}""")
    data = Preproc.filter_values(data, p['year_attribute'], p['years_to_keep'])

    preproc = Preproc(data, p['columns_to_drop'], p['uf_drop_list'])
    preproc.apply_general_preproc(uf_data, ncm_data, country_data,
                                  p['textual_cut_point'])

    grouped_by_year = preproc.get_top_products_by_year(p['top_n'])
    logging.info(f"""Top {p['top_n']} {prefix} by UF each year
        \n{grouped_by_year.to_markdown()}\n{draw_line}""")

    grouped_by_month = preproc.get_top_products_by_month(p['year'], p['top_n'])
    logging.info(f"""Top {p['top_n']} {prefix} by UF each {p['year']}'s month
                  \n{grouped_by_month.to_markdown()}\n{draw_line}""")

    grouped_values_by_uf = preproc.get_summed_values_by_uf(p['year'])
    logging.info(f"""{prefix} values per UF in {p['year']}\n
                {grouped_values_by_uf.to_markdown()}\n{draw_line}""")

    grouped_uf = preproc.get_top_products_by_month_one_uf(
        p['keep_uf'], p['top_n'])
    logging.info(
        f"""{prefix} {p['keep_uf']} top {p['top_n']} values per month\n
                 {grouped_uf.to_markdown()}\n{draw_line}""")

    logging.info(f"Plotting data \n{draw_line}")
    ExploratoryAnalysis.plot_data(grouped_by_year, grouped_by_month,
                                  grouped_values_by_uf, p['top_n'], p['year'],
                                  prefix)

    logging.info(
        f"""Keep only top {p['top_n']} data per month from UF {p['keep_uf']}
                  to predict their values\n{draw_line}""")
    preproc.keep_top_data_to_predict(grouped_uf, p['keep_uf'])

    logging.info(f"Data exploration\n{draw_line}")
    data_exploration(preproc.data)

    logging.info(f"Remove empty 'VL_FOB' from data\n{draw_line}")
    preproc.drop_zero_vlfob()

    logging.info(
        f"Transforming target variable 'VL_FOB' into log\n{draw_line}")
    preproc.target_log()

    logging.info("Data exploration after preprocessing steps\n{draw_line}")
    data_exploration(preproc.data)

    return preproc.data
コード例 #18
0
ファイル: runtest.py プロジェクト: tianhaoz95/memeron
def test_load_dataset():
    train_x, train_y, test_x, test_y = utils.load_dataset()
コード例 #19
0
ファイル: main.py プロジェクト: luanps/churn_analysis
    print(f"""Attributes with null data:\n {exploratory.check_null()}""")

    empty_spaces = exploratory.check_empty_spaces()
    print(
        f"""Textual attributes with empty data (value=' '):\n{empty_spaces}""")

    unique_values = exploratory.check_unique_values(10)
    print(f"""Sample of each attribute:""")
    [print(key, value) for key, value in unique_values.items()]
    return empty_spaces


if __name__ == '__main__':
    print("============= Reading data from file  =============")
    filepath = 'data/database.csv'
    data = load_dataset(filepath)

    print("============= Data exploration  =============")
    empty_spaces = data_exploration(data)

    print("============= Applying Preprocessing step =============")
    columns_to_drop = ['customerID', 'code', 'Hash']
    preproc = Preproc(data, columns_to_drop, empty_spaces)
    treated_data = preproc.apply_preproc()

    print("============= Data exploration after preprocessing  =============")
    data_exploration(treated_data)

    print("============= Plotting data  =============")
    ExploratoryAnalysis.plot_data(treated_data)
コード例 #20
0
import time
import math
import tensorflow as tf
import src.utils as utils

NUM_CLASSES = 2
IMAGE_SIZE = 28
CHANNELS = 3
IMAGE_PIXELS = IMAGE_SIZE * IMAGE_SIZE * CHANNELS
LABEL_IMAGES_COUNT = 0  # 0 for no restrictions

# Get the sets of images and labels for training, validation, and
train_images, train_labels = utils.load_dataset(NUM_CLASSES,
                                                IMAGE_SIZE,
                                                LABEL_IMAGES_COUNT,
                                                is_reshape=True)
BATCH_SIZE = len(train_images)
train_images = train_images.reshape(BATCH_SIZE, IMAGE_PIXELS)

# Basic model parameters as external flags.
flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_float('learning_rate', 0.01, 'Initial learning rate.')
flags.DEFINE_integer('max_steps', 2000, 'Number of steps to run trainer.')
flags.DEFINE_integer('hidden1', 128, 'Number of units in hidden layer 1.')
flags.DEFINE_integer('hidden2', 32, 'Number of units in hidden layer 2.')
flags.DEFINE_integer(
    'batch_size', BATCH_SIZE, 'Batch size.  '
    'Must divide evenly into the dataset sizes.')
flags.DEFINE_string('train_dir', 'data', 'Directory to put the training data.')
flags.DEFINE_boolean('fake_data', False, 'If true, uses fake data '
"""Trains a convolutional neural network on the CIFAR10 dataset with feature squeezing as a defense.

Gets to 70.04% test accuracy after 10 epochs.
"""
from __future__ import absolute_import, division, print_function

from os.path import abspath
import sys
sys.path.append(abspath('.'))
from config import config_dict

from src.classifiers.cnn import CNN
from src.utils import load_dataset

# Read CIFAR10 dataset
(x_train, y_train), (x_test, y_test), _, _ = load_dataset('cifar10')
im_shape = x_train[0].shape

# Construct a convolutional neural network with feature squeezing activated
# For CIFAR10, squeezing the features to 3 bits works well
comp_params = {
    'loss': 'categorical_crossentropy',
    'optimizer': 'adam',
    'metrics': ['accuracy']
}
classifier = CNN(im_shape,
                 act='relu',
                 dataset='cifar10',
                 defences='featsqueeze3')
classifier.compile(comp_params)
classifier.fit(x_train,
コード例 #22
0
def custom_train(args):
    # 1. load dataset and model
    (train_images, train_labels), (test_images,
                                   test_labels) = load_dataset(args.data)
    input_shape = train_images[:args.batch_size, :, :, :].shape
    output_size = max(train_labels) + 1
    model = load_model(args.arch,
                       input_shape=input_shape,
                       output_size=output_size)
    model.summary()

    # 2. set tensorboard configs
    logdir = os.path.join(args.logdir, get_current_time())
    train_writer = tf.summary.create_file_writer(os.path.join(logdir, "train"))
    test_writer = tf.summary.create_file_writer(os.path.join(logdir, "test"))

    # 3. loss, optimizer, metrics setting
    optimizer = tf.keras.optimizers.Adam()
    criterion = tf.keras.losses.SparseCategoricalCrossentropy()
    train_loss_avg = tf.keras.metrics.Mean()
    train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()
    test_loss_avg = tf.keras.metrics.Mean()
    test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()

    # 4. dataset config
    buffer_size = len(train_images)
    train_steps_per_epoch = math.ceil(len(train_images) / args.batch_size)
    if args.steps_per_epoch:
        train_steps_per_epoch = min(args.steps_per_epoch,
                                    train_steps_per_epoch)

    train_ds = tf.data.Dataset.from_tensor_slices((train_images, train_labels))
    train_ds = train_ds.shuffle(buffer_size)
    if args.augmentation:
        train_ds = train_ds.map(augment)
    train_ds = train_ds.batch(args.batch_size)

    test_ds = tf.data.Dataset.from_tensor_slices((test_images, test_labels))
    test_ds = test_ds.batch(args.batch_size)

    @tf.function
    def train_step(x, y_true):
        with tf.GradientTape() as tape:
            y_pred = model(x, training=True)
            loss = criterion(y_true, y_pred)
        grads = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))
        avg_loss = train_loss_avg(loss)
        avg_acc = train_accuracy(y_true, y_pred)
        return avg_loss, avg_acc

    @tf.function
    def test_step(x, y_true):
        y_pred = model(x, training=False)
        loss = criterion(y_true, y_pred)
        avg_loss = test_loss_avg(loss)
        avg_acc = test_accuracy(y_true, y_pred)
        return avg_loss, avg_acc

    # 5. start train and test
    for epoch in range(args.max_epoch):
        print(f"Epoch {epoch + 1}/{args.max_epoch}")
        # 5.1. initialize metrics and progress bar
        train_loss_avg.reset_states()
        train_accuracy.reset_states()
        test_loss_avg.reset_states()
        test_accuracy.reset_states()

        pbar = tf.keras.utils.Progbar(train_steps_per_epoch)

        # 5.3. train
        for i, (x, y_true) in enumerate(train_ds):
            if i >= train_steps_per_epoch:
                break
            loss, acc = train_step(x, y_true)
            pbar.update(i + 1, [("loss", loss), ("accuracy", acc)],
                        finalize=False)

        # 5.4. test
        for x, y_true in test_ds:
            loss, acc = test_step(x, y_true)
        pbar.update(
            train_steps_per_epoch,
            [("test_loss", loss), ("test_accuracy", acc)],
            finalize=True,
        )

        # 5.5. write metrics to tensorboard
        with train_writer.as_default():
            tf.summary.scalar("Loss", train_loss_avg.result(), step=epoch)
            tf.summary.scalar("Acc", train_accuracy.result(), step=epoch)
        with test_writer.as_default():
            tf.summary.scalar("Loss", test_loss_avg.result(), step=epoch)
            tf.summary.scalar("Acc", test_accuracy.result(), step=epoch)
コード例 #23
0
# Retrieve previous results for classifier
try:
    with open(os.path.join(MODEL_PATH, "accuracies.json"), "r") as json_file:
        results = json.load(json_file)

        results_timestamp = os.path.getmtime(
            os.path.join(MODEL_PATH, "accuracies.json"))
except:
    results = {}
    results_timestamp = 0

already_tested = results.keys()

# Get dataset
(X_train, Y_train), (X_test, Y_test), _, _ = load_dataset(MODEL_PATH)

if "train_accuracy" not in already_tested:
    # Test on true train instances
    scores = classifier.evaluate(X_train, Y_train, verbose=args.verbose)
    v_print("\naccuracy on train: %.2f%%" % (scores[1] * 100))
    results["train_accuracy"] = scores[1] * 100

if "test_accuracy" not in already_tested:
    # Test on true test instances
    scores = classifier.evaluate(X_test, Y_test, verbose=args.verbose)
    v_print("\naccuracy on test: %.2f%%" % (scores[1] * 100))
    results["test_accuracy"] = scores[1] * 100

# Get adversarial examples
ADV_PATH = os.path.join(DATA_PATH, "adversarial", args.dataset)
from config import config_dict

from numpy import append
import tensorflow as tf
import keras.backend as k

from src.attacks.deepfool import DeepFool
from src.classifiers.cnn import CNN
from src.utils import load_dataset

# Get session
session = tf.Session()
k.set_session(session)

# Read CIFAR10 dataset
(x_train, y_train), (x_test, y_test), min_, max_ = load_dataset('cifar10')
x_train, y_train = x_train[:5000], y_train[:5000]
x_test, y_test = x_test[:500], y_test[:500]
im_shape = x_train[0].shape

# Construct a convolutional neural network
comp_params = {
    'loss': 'categorical_crossentropy',
    'optimizer': 'adam',
    'metrics': ['accuracy']
}
classifier = CNN(im_shape, act='relu', dataset='cifar10')
classifier.compile(comp_params)
classifier.fit(x_train,
               y_train,
               validation_split=.1,
コード例 #25
0
ファイル: cnn.py プロジェクト: v-zmiycharov/image-recognition
import numpy as np
import lasagne
from lasagne import layers
from lasagne.updates import nesterov_momentum
from nolearn.lasagne import NeuralNet
import definitions
from src.image_net import IMAGES
import src.utils as utils

if __name__ == '__main__':
    IMAGE_SIZE = 32
    TRAIN_IMAGES_COUNT = 300
    NUM_CLASSES = 5

    X_train, y_train = utils.load_dataset(NUM_CLASSES,
                                          IMAGE_SIZE,
                                          TRAIN_IMAGES_COUNT,
                                          is_reshape=True)

    net1 = NeuralNet(
        layers=[
            ('input', layers.InputLayer),
            ('conv2d1', layers.Conv2DLayer),
            ('maxpool1', layers.MaxPool2DLayer),
            ('conv2d2', layers.Conv2DLayer),
            ('maxpool2', layers.MaxPool2DLayer),
            ('dropout1', layers.DropoutLayer),
            ('dense', layers.DenseLayer),
            ('dropout2', layers.DropoutLayer),
            ('output', layers.DenseLayer),
        ],
        # input layer
コード例 #26
0
from config import config_dict

import tensorflow as tf
import keras.backend as k

from src.attacks.deepfool import DeepFool
from src.classifiers.cnn import CNN
from src.classifiers.resnet import ResNet
from src.utils import load_dataset

# Get session
session = tf.Session()
k.set_session(session)

# Read MNIST dataset
(x_train, y_train), (x_test, y_test), min_, max_ = load_dataset('mnist')
im_shape = x_train[0].shape

# Construct and train a Resnet convolutional neural network
comp_params = {'loss': 'categorical_crossentropy',
               'optimizer': 'adam',
               'metrics': ['accuracy']}
source = ResNet(im_shape, act='relu')
source.compile(comp_params)
source.fit(x_train, y_train, validation_split=.1, epochs=5, batch_size=128)

# Craft adversarial samples with DeepFool
epsilon = .1  # Maximum perturbation
adv_crafter = DeepFool(source, sess=session)
x_train_adv = adv_crafter.generate(x_val=x_train, eps=epsilon, clip_min=min_, clip_max=max_)
x_test_adv = adv_crafter.generate(x_val=x_test, eps=epsilon, clip_min=min_, clip_max=max_)