コード例 #1
0
ファイル: pascalVOC.py プロジェクト: Delaunay/mlbaselines
    def __init__(self, data_path, year='2012', cache=None, **kargs):

        with FileLock('voc.lock', timeout=option('download.lock.timeout', 4 * 60, type=int)):
            train_dataset = torchvision.datasets.VOCSegmentation(
                root=data_path, year=year, image_set='train', download=True,
                transforms=UserTransform())

        with FileLock('voc.lock', timeout=option('download.lock.timeout', 4 * 60, type=int)):
            test_dataset = torchvision.datasets.VOCSegmentation(
                root=data_path, year=year, image_set='val', download=True,
                transforms=UserTransform())

        dataset = DatasetPaddingWrapper(
            torch.utils.data.ConcatDataset([train_dataset, test_dataset]))

        if cache:
            dataset = DatasetCache(dataset, cache)

        if 'train_size' not in kargs:
            kargs['train_size'] = len(train_dataset)
        if 'valid_size' not in kargs:
            kargs['valid_size'] = len(test_dataset)//2
        if 'test_size' not in kargs:
            kargs['test_size'] = len(test_dataset)//2 + len(test_dataset)%2

        super(PascalVOC, self).__init__(
            dataset,
            **kargs,
        )
コード例 #2
0
    def __init__(self,
                 uri,
                 database,
                 id,
                 experiment=None,
                 hpo_allowed=True,
                 work_allowed=True,
                 log_capture=False):
        super(TrialWorker, self).__init__(uri, database, experiment, id,
                                          WORK_QUEUE, RESULT_QUEUE)
        self.namespaced = experiment is not None
        self.client.capture = log_capture

        if work_allowed:
            self.new_handler(WORK_ITEM, self.run_trial)

        if hpo_allowed:
            self.new_handler(HPO_ITEM, self.run_hpo)

        self.new_handler(WORKER_JOIN, self.ignore_message)

        self.timeout = option('worker.timeout', 5 * 60, type=int)
        self.max_retry = option('worker.max_retry', 3, type=int)
        self.backoff = dict()

        # Disable shutting down when receiving shut down
        if experiment is None:
            info(f'Disabling message shutdown because {experiment}')
            self.dispatcher[SHUTDOWN] = lambda *args, **kwargs: print(
                'ignoring shutdown signal')
コード例 #3
0
    def __init__(self, data_path):
        transformations = [
            transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))]

        train_transform = [
            to_pil_image,
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor()] + transformations

        transformations = dict(
            train=transforms.Compose(train_transform),
            valid=transforms.Compose(transformations),
            test=transforms.Compose(transformations))

        with FileLock('cifar100.lock', timeout=option('download.lock.timeout', 4 * 60, type=int)):
            train_dataset = datasets.CIFAR100(root=data_path, train=True, download=True, transform=transforms.ToTensor())

        with FileLock('cifar100.lock', timeout=option('download.lock.timeout', 4 * 60, type=int)):
            test_dataset = datasets.CIFAR100(root=data_path, train=False, download=True, transform=transforms.ToTensor())

        super(CIFAR100, self).__init__(
            torch.utils.data.ConcatDataset([train_dataset, test_dataset]),
            test_size=len(test_dataset),
            transforms=transformations
        )
コード例 #4
0
    def __init__(self,
                 data_path,
                 transform=True,
                 transform_seed=0,
                 cache=None):
        transformations = [
            transforms.Normalize((0.4914, 0.4822, 0.4465),
                                 (0.2023, 0.1994, 0.2010))
        ]

        rng = numpy.random.RandomState(transform_seed)

        if transform:
            train_transform = [
                to_pil_image,
                RandomCrop(32, padding=4, seed=rng.randint(2**30)),
                RandomHorizontalFlip(seed=rng.randint(2**30)),
                transforms.ToTensor()
            ] + transformations

        else:
            train_transform = transformations

        transformations = dict(train=Compose(train_transform),
                               valid=Compose(transformations),
                               test=Compose(transformations))

        with FileLock('cifar10.lock',
                      timeout=option('download.lock.timeout', 4 * 60,
                                     type=int)):
            train_dataset = datasets.CIFAR10(root=data_path,
                                             train=True,
                                             download=True,
                                             transform=transforms.ToTensor())

        with FileLock('cifar10.lock',
                      timeout=option('download.lock.timeout', 4 * 60,
                                     type=int)):
            test_dataset = datasets.CIFAR10(root=data_path,
                                            train=False,
                                            download=True,
                                            transform=transforms.ToTensor())

        super(CIFAR10, self).__init__(torch.utils.data.ConcatDataset(
            [train_dataset, test_dataset]),
                                      test_size=len(test_dataset),
                                      transforms=transformations)
コード例 #5
0
    def __init__(self,
                 data_path,
                 mini=False,
                 train_size=None,
                 valid_size=None,
                 test_size=None,
                 input_shape=None,
                 target_shape=None,
                 **kwargs):
        transformations = [transforms.Normalize((0.1307, ), (0.3081, ))]

        if mini:
            transformations.insert(0, minimize(7))

        transform = transforms.Compose(transformations)

        with FileLock('mnist.lock',
                      timeout=option('download.lock.timeout', 4 * 60,
                                     type=int)):
            train_dataset = datasets.MNIST(data_path,
                                           train=True,
                                           download=True,
                                           transform=transforms.ToTensor())

        with FileLock('mnist.lock',
                      timeout=option('download.lock.timeout', 4 * 60,
                                     type=int)):
            test_dataset = datasets.MNIST(data_path,
                                          train=False,
                                          download=True,
                                          transform=transforms.ToTensor())

        if test_size is None:
            test_size = len(test_dataset)

        super(MNIST, self).__init__(torch.utils.data.ConcatDataset(
            [train_dataset, test_dataset]),
                                    test_size=test_size,
                                    train_size=train_size,
                                    valid_size=valid_size,
                                    transforms=transform,
                                    input_shape=input_shape,
                                    target_shape=target_shape)
コード例 #6
0
    def __init__(self, data_path):
        with FileLock('SVHN.lock',
                      timeout=option('download.lock.timeout', 4 * 60,
                                     type=int)):
            train_dataset = datasets.SVHN(data_path,
                                          split='train',
                                          download=True,
                                          transform=transforms.ToTensor())

        with FileLock('SVHN.lock',
                      timeout=option('download.lock.timeout', 4 * 60,
                                     type=int)):
            test_dataset = datasets.SVHN(data_path,
                                         split='test',
                                         download=True,
                                         transform=transforms.ToTensor())

        super(SVHN, self).__init__(torch.utils.data.ConcatDataset(
            [train_dataset, test_dataset]),
                                   test_size=len(test_dataset))
コード例 #7
0
ファイル: main.py プロジェクト: Delaunay/mlbaselines
def dashboard():
    """Dashboard entry point for Olympus user"""
    from msgqueue.backends import new_monitor

    parser = ArgumentParser()
    parser.add_argument(
        '--uri',
        type=str,
        default='mongo://127.0.0.1:27017',
        help='URI pointing to the resource to connect to\n'
        'Examples:\n'
        '   - mongodb instance: mongo://127.0.0.1:27017\n'
        '   - cockroach db instance cockroach://0.0.0.0:8123\n'
        '   - local archive: zip:/home/setepenre/work/olympus/data.zip\n')
    parser.add_argument('--database',
                        type=str,
                        default='olympus',
                        help='Name of the database')
    parser.add_argument('--storage',
                        type=str,
                        default=option('state.storage',
                                       '/home/setepenre/zshare/tmp'),
                        help='FS path to storage')
    args = parser.parse_args()

    dash = Dashboard()
    client = new_monitor(args.uri, args.database)

    state_storage = StateStorage(folder=args.storage)

    navbar = html.navbar(
        Experiments='/experiment',
        Studies='/study',
        States='/state/browse',
        Debug='/',
    )

    dash.add_page(MainPage(dash), header=navbar)
    dash.add_page(StatusQueue(client), header=navbar)
    dash.add_page(InspectQueue(client), header=navbar)
    dash.add_page(ResultQueue(client), header=navbar)
    dash.add_page(GanttQueue(client), header=navbar)
    dash.add_page(SpaceQueue(client), header=navbar)
    dash.add_page(FANVOAQueue(client), header=navbar)
    dash.add_page(LogsQueue(client), header=navbar)
    dash.add_page(MetricQueue(client), header=navbar)
    dash.add_page(ExperimentOverview(client), header=navbar)
    dash.add_page(StudyOverview(client), header=navbar)
    dash.add_page(StateBrowser(state_storage), header=navbar)
    dash.add_page(InspectModel(state_storage), header=navbar)

    return dash
コード例 #8
0
    def __init__(self, data_path):
        with FileLock('FashionMNIST.lock',
                      timeout=option('download.lock.timeout', 4 * 60,
                                     type=int)):
            train_dataset = datasets.FashionMNIST(
                data_path,
                train=True,
                download=True,
                transform=transforms.ToTensor())

        with FileLock('FashionMNIST.lock',
                      timeout=option('download.lock.timeout', 4 * 60,
                                     type=int)):
            test_dataset = datasets.FashionMNIST(
                data_path,
                train=False,
                download=True,
                transform=transforms.ToTensor())

        super(FashionMNIST, self).__init__(torch.utils.data.ConcatDataset(
            [train_dataset, test_dataset]),
                                           test_size=len(test_dataset))
コード例 #9
0
def build_dataset(data_path, timeout=10 * 60):
    if all_hdf5_exists(data_path):
        return

    try:
        with FileLock(os.path.join(data_path, DIRNAME + ".lock"), timeout=option('download.lock.timeout', timeout, type=int)):
            download(data_path)
            unzip(data_path)
            create_hdf5(data_path)
    except Timeout:
        print("Another process holds the lock since more than {} seconds. "
              "Will try to load the dataset.").format(timeout)
    finally:
        clean(data_path)
コード例 #10
0
    def __init__(self,
                 name=None,
                 dataset=None,
                 path=option('data.path', default='/tmp/olympus/data'),
                 **kwargs):
        if dataset is not None:
            self.dataset = dataset

        elif name is not None:
            dataset_ctor = registered_datasets.get(name)

            if dataset_ctor is None:
                raise RegisteredDatasetNotFound(name)

            self.dataset = dataset_ctor(data_path=path, **kwargs)

        else:
            raise MissingArgument('Dataset or Name need to be set')
コード例 #11
0
ファイル: bert.py プロジェクト: Delaunay/mlbaselines
    def __call__(self, input_size, output_size, attention_probs_dropout_prob,
                 hidden_dropout_prob):

        cache_dir = option('model.cache', '/tmp/olympus/cache')
        info('model cache folder: {}'.format(cache_dir))

        config = BertConfig.from_pretrained('bert-base-uncased',
                                            num_labels=2,
                                            finetuning_task=self.task,
                                            cache_dir=cache_dir)

        config.attention_probs_dropout_prob = attention_probs_dropout_prob
        config.hidden_dropout_prob = hidden_dropout_prob

        model = BertWrapper.from_pretrained('bert-base-uncased',
                                            from_tf=False,
                                            config=config,
                                            cache_dir=cache_dir)

        return model
コード例 #12
0
    def __init__(self,
                 data_path,
                 transforms=None,
                 target_transforms=None,
                 download=True):
        self.root = data_path

        if download:
            with FileLock('penndufan.lock',
                          timeout=option('download.lock.timeout',
                                         4 * 60,
                                         type=int)):
                self.download()

        self.transforms = transforms
        self.target_transforms = target_transforms
        # load all image files, sorting them to
        # ensure that they are aligned
        self.imgs = list(sorted(os.listdir(self.images_path)))
        self.masks = list(sorted(os.listdir(self.masks_path)))
コード例 #13
0
    def __init__(self,
                 hpo,
                 rank,
                 uri,
                 experiment,
                 database=option('olympus.database', 'olympus')):
        self.hpo = hpo
        self.experiment = experiment
        self.client = new_client(uri, database)
        self.current_message = None

        # check that HPO is not finished
        state = self._fetch_final_state()
        if state is not None:
            raise ExperimentFinished(
                f'Experiment `{experiment}` is finished, change the experiment name'
            )

        # first worker queue HPO
        if rank == 0:
            self._queue_hpo()

        # broadcast that one worker is joining
        self.client.push(RESULT_QUEUE, self.experiment, {}, mtype=WORKER_JOIN)
コード例 #14
0
import torch

from olympus.datasets import SplitDataset, Dataset, DataLoader
from olympus.datasets.decorator.window import WindowedDataset
from olympus.metrics import Loss
from olympus.models import Model

from olympus.optimizers import Optimizer, known_optimizers
from olympus.tasks.finance import Finance, SharpeRatioCriterion
from olympus.utils import option, fetch_device, show_hyperparameter_space, parse_args
from olympus.observers import metric_logger, CheckPointer

from olympus.utils.storage import StateStorage

DEFAULT_EXP_NAME = 'finance'
base = option('base_path', '/media/setepenre/local/')


def arguments():
    parser = ArgumentParser(prog='finance',
                            description='Finance Baseline',
                            epilog=show_hyperparameter_space(),
                            formatter_class=RawDescriptionHelpFormatter)

    parser.add_argument('--uri',
                        type=str,
                        default=None,
                        help='Resource to use to store metrics')

    parser.add_argument('--database',
                        type=str,
コード例 #15
0
ファイル: minimalist.py プロジェクト: Delaunay/mlbaselines
import torch.nn.functional as F
from olympus.datasets import Dataset, SplitDataset, DataLoader
from olympus.optimizers import Optimizer, LRSchedule

from olympus.models import Model
from olympus.observers import ObserverList, ProgressView, Speed
from olympus.utils import fetch_device, option

epochs = 2
device = fetch_device()
base = option('base_path', '/tmp/olympus')

# Model
model = Model('resnet18', input_size=(1, 28, 28), output_size=(10, ))

# Optimizer
optimizer = Optimizer('sgd',
                      params=model.parameters(),
                      weight_decay=0.001,
                      lr=1e-5,
                      momentum=1e-5)

# Schedule
lr_schedule = LRSchedule('exponential', optimizer=optimizer, gamma=0.99)

data = Dataset('fake_mnist', path=f'{base}/data')

splits = SplitDataset(data, split_method='original')

# Dataloader
loader = DataLoader(splits, sampler_seed=1, batch_size=32)
コード例 #16
0
ファイル: dataset.py プロジェクト: Delaunay/mlbaselines
from olympus.datasets import Dataset, SplitDataset, DataLoader
from olympus.utils import option, new_seed, get_seeds
from argparse import ArgumentParser

parser = ArgumentParser()
parser.add_argument('dataset', type=str, help='name of the dataset to load')
args = parser.parse_args()

# can be customized using OLYMPUS_BASE_PATH or Olympus configuration file if found
base = option('data.path', '/tmp/olympus')

# get the dataset
dataset = Dataset(args.dataset, path=f'{base}/data')

# How to split the dataset
splits = SplitDataset(dataset, split_method='original')

# DataLoader builder
loader = DataLoader(splits, sampler_seed=new_seed(sampler=1), batch_size=32)

# Train my model
for step, batch in enumerate(loader.train()):
    print('\rTrain:', step, len(batch), end='')
print()

# Using a bigger batch size when gradient is not computed
for step, batch in enumerate(loader.valid(batch_size=1024)):
    print('\rValid:', step, len(batch), end='')
print()

for step, batch in enumerate(loader.test(batch_size=1024)):
コード例 #17
0
def main():
    from sspace.space import compute_identity

    args = arguments()
    tickers = [
        # 1     2      3     4      5       6     7     8      9    10
        'MO',
        'AEP',
        'BA',
        'BMY',
        'CPB',
        'CAT',
        'CVX',
        'KO',
        'CL',
        'COP',  # 1
        'ED',
        'CVS',
        'DHI',
        'DHR',
        'DRI',
        'DE',
        'D',
        'DTE',
        'ETN',
        'EBAY',  # 2
        'F',
        'BEN',
        'HSY',
        'HBAN',
        'IBM',
        'K',
        'GIS',
        'MSI',
        'NSC',
        'TXN'
    ]
    start, end = '2000-01-01', '2019-05-10'

    device = fetch_device()

    task = finance_baseline(tickers, start, end, args.optimizer,
                            args.batch_size, device, args.window)

    lr = 1e-8
    uid = compute_identity(
        dict(tickers=tickers,
             start=start,
             end=end,
             window=args.window,
             lr=lr,
             epochs=args.epochs), 16)

    if args.uri is not None:
        logger = metric_logger(args.uri, args.database,
                               f'{DEFAULT_EXP_NAME}_{uid}')
        task.metrics.append(logger)

    if args.storage is not None:
        storage = StateStorage(
            folder=option('state.storage', '/home/setepenre/zshare/tmp'))
        task.metrics.append(
            CheckPointer(storage=storage,
                         time_buffer=5,
                         keep_best='validation_loss',
                         save_init=True))

    optimizer = task.optimizer.defaults
    optimizer['lr'] = lr

    task.init(optimizer=optimizer, uid=uid)
    task.fit(args.epochs)

    stats = task.metrics.value()
    print(stats)
    return float(stats['validation_loss'])