Example #1
0
    def _setup_app_singleton(self):
        if not self._app or not self._api:

            self._app = Flask(CONFIG['APP_NAME'])
            self._app.logger = create_logger(slack=True)
            self._app.config.from_object('src.config.ChatterConfig')
            self._api = Api(self._app)
Example #2
0
def main():
    log = utils.create_logger()

    if validate.config_validate():
        log.info('Load and preprocess data...')

        df = utils.preprocess_df(filename=config.INI['DIRECTORY']['filename'],
                                 features=config.all_features,
                                 num_rows=int(
                                     config.INI['DIRECTORY']['num_rows']))

        log.info('Checking features data type if same as expected...')
        utils.check_dtype(df, config.features_dtype_int, int)
        utils.check_dtype(df, config.features_dtype_list, list)

        log.info('Writing to TFRecords...')
        utils.write_to_tfrecords(df, config.all_features)

        log.info('Finished all')
Example #3
0
def main(args):
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    makedirs(args.log_root)
    makedirs(args.model_folder)

    setattr(args, 'log_folder', args.log_root)
    setattr(args, 'model_folder', args.model_folder)

    logger = create_logger(args.log_root, args.todo, 'info')

    print_args(args, logger)
    if args.model == 'enet':
        model = EfficientNet.from_pretrained('efficientnet-b5', num_classes=2)
    elif args.model == 'xception':
        model, *_ = model_selection(modelname='xception',
                                    num_out_classes=2,
                                    init_checkpoint=args.init_load)
    else:
        raise NotImplementedError

    if args.load_checkpoint is not None:
        if device.type == 'cpu':
            checkpoint = torch.load(args.load_checkpoint,
                                    map_location=torch.device('cpu'))
        else:
            checkpoint = torch.load(args.load_checkpoint)
        model.load_state_dict(checkpoint)

    if torch.cuda.device_count() > 1:
        print('GPUs: ', torch.cuda.device_count())
        model = nn.DataParallel(model)

    model = model.to(device)

    trainer = Trainer(args, logger)

    if args.todo == 'train':
        if args.array:
            transform = transforms.Normalize(mean=[0.5], std=[0.5])

            def npy_loader(path):
                sample = torch.from_numpy(np.load(path))
                return sample

            #BUILD TRAIN SET
            print("Initializing Dataset...")
            train_set = DatasetFolderWithPaths(root=args.data_root,
                                               loader=npy_loader,
                                               extensions='.npy',
                                               transform=transform)
            print("Dataset is successful")

            #BUILD VAL SET
            val_set = DatasetFolderWithPaths(root=args.val_root,
                                             loader=npy_loader,
                                             extensions='.npy',
                                             transform=transform)
        else:
            transform = transforms.Compose([
                transforms.Resize((299, 299)),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
            ])
            train_set = ImageFolderWithPaths(args.data_root,
                                             transform=transform)
            val_set = ImageFolderWithPaths(args.val_root, transform=transform)
        logger.info('Train Total: %d' % len(train_set))
        logger.info('Val Total: %d' % len(val_set))
        #logger.info( "Classes: {}".format(' '.join(map(str, train_set.classes))))

        tr_loader = DataLoader(train_set,
                               batch_size=args.batch_size,
                               shuffle=True,
                               num_workers=args.nworkers,
                               pin_memory=torch.cuda.is_available())
        te_loader = DataLoader(val_set,
                               batch_size=args.batch_size,
                               shuffle=False,
                               num_workers=args.nworkers,
                               pin_memory=torch.cuda.is_available())

        if args.array:
            trainer.train(model,
                          tr_loader,
                          te_loader,
                          device,
                          adv_train=args.adv)
        else:
            trainer.train(model,
                          tr_loader,
                          te_loader,
                          device,
                          adv_train=args.adv)

    elif args.todo == 'test':
        if args.array:
            transform = transforms.Normalize(mean=[0.5], std=[0.5])

            def npy_loader(path):
                sample = torch.from_numpy(np.load(path))
                return sample

            te_dataset = DatasetFolderWithPaths(root=args.data_root,
                                                loader=npy_loader,
                                                extensions='.npy',
                                                transform=transform)
        else:
            transform = transforms.Compose([
                transforms.Resize((299, 299)),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
            ])
            te_dataset = ImageFolderWithPaths(args.data_root,
                                              transform=transform)

        te_loader = DataLoader(te_dataset,
                               batch_size=args.batch_size,
                               shuffle=False,
                               num_workers=args.nworkers,
                               pin_memory=torch.cuda.is_available())

        if args.array:
            std_acc, loss = trainer.test(model,
                                         te_loader,
                                         device,
                                         adv_test=args.adv)
        else:
            std_acc, loss = trainer.test(model,
                                         te_loader,
                                         device,
                                         adv_test=args.adv)
        print("std acc: {:4f}".format(std_acc * 100))
    else:
        raise NotImplementedError
Example #4
0
import os

import dill
import theano
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn import metrics
from neupy import algorithms, layers, environment

from src.word_embedding_nn import WordEmbeddingNN
from src.preprocessing import TokenizeText, IgnoreUnknownWords
from src.utils import (WORD_EMBEDDING_NN, NN_CLASSIFIER_MODEL, REVIEWS_FILE,
                       create_logger)

logger = create_logger(__name__)

environment.reproducible()
theano.config.floatX = 'float32'

if not os.path.exists(WORD_EMBEDDING_NN):
    raise EnvironmentError("Can't find NN model. File {} doesn't exist {}."
                           "Probably you haven't train it yet. "
                           "Run `train_word_embedding_nn.py` script.")

logger.info("Reading data")
data = pd.read_csv(REVIEWS_FILE, sep='\t')

logger.info("Loading word embedding NN")
word2vec = WordEmbeddingNN.load(WORD_EMBEDDING_NN)

prepare_data_pipeline = Pipeline([
Example #5
0
import os

import dill
import theano
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn import metrics
from neupy import algorithms, layers, environment

from src.word_embedding_nn import WordEmbeddingNN
from src.preprocessing import TokenizeText, IgnoreUnknownWords
from src.utils import (WORD_EMBEDDING_NN, NN_CLASSIFIER_MODEL, REVIEWS_FILE,
                       create_logger)


logger = create_logger(__name__)

environment.reproducible()
theano.config.floatX = 'float32'

if not os.path.exists(WORD_EMBEDDING_NN):
    raise EnvironmentError("Can't find NN model. File {} doesn't exist {}."
                           "Probably you haven't train it yet. "
                           "Run `train_word_embedding_nn.py` script.")

logger.info("Reading data")
data = pd.read_csv(REVIEWS_FILE, sep='\t')

logger.info("Loading word embedding NN")
word2vec = WordEmbeddingNN.load(WORD_EMBEDDING_NN)
Example #6
0
def main(args):

    save_folder = '%s_%s' % (args.dataset, args.affix)

    log_folder = os.path.join(args.log_root, save_folder)
    model_folder = os.path.join(args.model_root, save_folder)

    makedirs(log_folder)
    makedirs(model_folder)

    setattr(args, 'log_folder', log_folder)
    setattr(args, 'model_folder', model_folder)

    logger = create_logger(log_folder, args.todo, 'info')

    print_args(args, logger)

    model = WideResNet(depth=34, num_classes=10, widen_factor=10, dropRate=0.0)

    attack = FastGradientSignUntargeted(model,
                                        args.epsilon,
                                        args.alpha,
                                        min_val=0,
                                        max_val=1,
                                        max_iters=args.k,
                                        _type=args.perturbation_type)

    if torch.cuda.is_available():
        model.cuda()

    trainer = Trainer(args, logger, attack)

    if args.todo == 'train':
        transform_train = tv.transforms.Compose([
            tv.transforms.RandomCrop(32,
                                     padding=4,
                                     fill=0,
                                     padding_mode='constant'),
            tv.transforms.RandomHorizontalFlip(),
            tv.transforms.ToTensor(),
        ])
        tr_dataset = tv.datasets.CIFAR10(args.data_root,
                                         train=True,
                                         transform=transform_train,
                                         download=True)

        tr_loader = DataLoader(tr_dataset,
                               batch_size=args.batch_size,
                               shuffle=True,
                               num_workers=4)

        # evaluation during training
        te_dataset = tv.datasets.CIFAR10(args.data_root,
                                         train=False,
                                         transform=tv.transforms.ToTensor(),
                                         download=True)

        te_loader = DataLoader(te_dataset,
                               batch_size=args.batch_size,
                               shuffle=False,
                               num_workers=4)

        trainer.train(model, tr_loader, te_loader, args.adv_train)
    elif args.todo == 'test':
        te_dataset = tv.datasets.CIFAR10(args.data_root,
                                         train=False,
                                         transform=tv.transforms.ToTensor(),
                                         download=True)

        te_loader = DataLoader(te_dataset,
                               batch_size=args.batch_size,
                               shuffle=False,
                               num_workers=4)

        checkpoint = torch.load(args.load_checkpoint)
        model.load_state_dict(checkpoint)

        std_acc, adv_acc = trainer.test(model,
                                        te_loader,
                                        adv_test=True,
                                        use_pseudo_label=False)

        print(f"std acc: {std_acc * 100:.3f}%, adv_acc: {adv_acc * 100:.3f}%")

    else:
        raise NotImplementedError
def main(args):

    save_folder = '%s_%s' % (args.dataset, args.affix)

    log_folder = os.path.join(args.log_root, save_folder)
    model_folder = os.path.join(args.model_root, save_folder)

    makedirs(log_folder)
    makedirs(model_folder)

    setattr(args, 'log_folder', log_folder)
    setattr(args, 'model_folder', model_folder)

    logger = create_logger(log_folder, args.todo, 'info')

    print_args(args, logger)

    # model = WideResNet(depth=34, num_classes=10, widen_factor=10, dropRate=0.0)
    model = models.resnet50(pretrained=True)
    num_classes = 10
    model.fc = nn.Linear(model.fc.in_features, num_classes)

    attack = FastGradientSignUntargeted(model,
                                        args.epsilon,
                                        args.alpha,
                                        min_val=0,
                                        max_val=1,
                                        max_iters=args.k,
                                        _type=args.perturbation_type)

    if torch.cuda.is_available():
        model.cuda()

    trainer = Trainer(args, logger, attack)

    if args.todo == 'train':
        transform_train = tv.transforms.Compose([
            tv.transforms.ToTensor(),
            tv.transforms.Lambda(lambda x: F.pad(
                x.unsqueeze(0),
                (4, 4, 4, 4), mode='constant', value=0).squeeze()),
            tv.transforms.ToPILImage(),
            tv.transforms.RandomCrop(32),
            tv.transforms.RandomHorizontalFlip(),
            tv.transforms.ToTensor(),
        ])
        tr_dataset = tv.datasets.CIFAR10(args.data_root,
                                         train=True,
                                         transform=transform_train,
                                         download=True)

        tr_loader = DataLoader(tr_dataset,
                               batch_size=args.batch_size,
                               shuffle=True,
                               num_workers=48)

        # evaluation during training
        te_dataset = tv.datasets.CIFAR10(args.data_root,
                                         train=False,
                                         transform=tv.transforms.ToTensor(),
                                         download=True)

        te_loader = DataLoader(te_dataset,
                               batch_size=args.batch_size,
                               shuffle=False,
                               num_workers=48)

        trainer.train(model, tr_loader, te_loader, args.adv_train)
    elif args.todo == 'test':
        te_dataset = tv.datasets.CIFAR10(args.data_root,
                                         train=False,
                                         transform=tv.transforms.ToTensor(),
                                         download=True)

        te_loader = DataLoader(te_dataset,
                               batch_size=args.batch_size,
                               shuffle=False,
                               num_workers=48)

        checkpoint = torch.load(args.load_checkpoint)
        model.load_state_dict(checkpoint)

        std_acc, adv_acc = trainer.test(model,
                                        te_loader,
                                        adv_test=True,
                                        use_pseudo_label=False)

        print("std acc: %.4f, adv_acc: %.4f" % (std_acc * 100, adv_acc * 100))

    else:
        raise NotImplementedError
Example #8
0
import tensorflow as tf
import timeit
import numpy as np
import os

from src.data_generator import DataPipeline
from src.config import data_pipeline_hps, dir_hps, train_hps
import src.utils as utils

log = utils.create_logger()

tf.reset_default_graph()
start_path = dir_hps.save_dir
final_path = os.path.join(start_path, dir_hps.load_dir)
builder_save_path = os.path.join(start_path, dir_hps.builder_save_dir)
if not os.path.isdir(final_path):
    os.makedirs(final_path)
if not os.path.isdir(builder_save_path):
    os.makedirs(builder_save_path)

# Start session
sess_config = tf.ConfigProto(device_count={"CPU": data_pipeline_hps.num_cores},
                             inter_op_parallelism_threads=24,
                             intra_op_parallelism_threads=24)
if train_hps.has_gpu == 1:
    log.info('Controlling the use of GPU')
    sess_config.gpu_options.allow_growth = True
sess = tf.Session(config=sess_config)
sess.run(tf.global_variables_initializer())

# Construct data loader