def _init_ctable(self, path):
        """
        Create empty ctable for given path.
        Obtain 、Create 、Append、Attr empty ctable for given path.
        addcol(newcol[, name, pos, move])	Add a new newcol object as column.
        append(cols)	Append cols to this ctable -- e.g. : ctable
        Flush data in internal buffers to disk:
        This call should typically be done after performing modifications
        (__settitem__(), append()) in persistence mode. If you don’t do this,
        you risk losing part of your modifications.

        Parameters
        ----------
        path : string
            The path to rootdir of the new ctable.
        """
        bcolz_dir = os.path.dirname(path)
        print('bcolz_dir', bcolz_dir)
        if not os.path.exists(bcolz_dir):
            os.makedirs(bcolz_dir)
            print('path', path)
        initial_array = np.empty(0, np.uint32)
        # 配置bcolz
        bcolz.set_nthreads(Num * bcolz.detect_number_of_cores())
        # Print all the versions of packages that bcolz relies on.
        bcolz.print_versions()
        """
        clevel : int (0 <= clevel < 10) The compression level.
        shuffle : int The shuffle filter to be activated. Allowed values are bcolz.NOSHUFFLE (0), 
                bcolz.SHUFFLE (1) and bcolz.BITSHUFFLE (2). The default is bcolz.SHUFFLE.
        cname : string (‘blosclz’, ‘lz4’, ‘lz4hc’, ‘snappy’, ‘zlib’, ‘zstd’)
                Select the compressor to use inside Blosc.
        quantize : int (number of significant digits)
                Quantize data to improve (lossy) compression. Data is quantized using np.around(scale*data)/scale,
                 where scale is 2**bits, and bits is determined from the quantize value. For example,
                  if quantize=1, bits will be 4. 0 means that the quantization is disabled.
        default : cparams(clevel=5, shuffle=1, cname='lz4', quantize=0)
        """
        params = bcolz.cparams(clevel=9)
        table = bcolz.ctable(
            rootdir=path,
            columns=[
                initial_array,
                initial_array,
                initial_array,
                initial_array,
                initial_array,
                initial_array,
                initial_array,
            ],
            names=self._bcolz_fields,
            mode='w',
            cparams=params
        )
        print('cparams', table.cparams)
        table.flush()
        table = self._init_attr(table, path)
        # table.attrs['metadata'] = self._init_metadata(path)
        return table
Beispiel #2
0
def cli(ctx):
    import socket
    ctx.obj['HOSTNAME'] = socket.gethostname()

    import bcolz
    bcolz.set_nthreads(1)
    # logging.basicConfig(stream=sys.stdout, level='DEBUG' if debug else 'INFO')
    # logger.error('Debug mode is %s' % ('on' if debug else 'off'))
    pd.set_option('display.max_rows', 100000000)
    pd.set_option('display.max_columns', 100000000)
    pd.set_option('display.width', 100000000)
Beispiel #3
0
    def __init__(self, transformer, gen_prefix, do_augment_data=False):
        self.transformer = transformer
        self.gen_prefix = gen_prefix
        self.do_augment_data = do_augment_data

        man = get_manager()
        self.data_path = man.samples_path(self.transformer.game, gen_prefix)
        self.summary_path = os.path.join(self.data_path,
                                         "gendata_summary.json")

        self.summary = self.get_summary()
        self.save_summary_file()
        bcolz.set_nthreads(4)
Beispiel #4
0
    def __init__(self, transformer, gen_prefix, do_augment_data=False,
                 data_augment_pct=1.0,
                 score_draw_as_random_hack=False):

        self.transformer = transformer
        self.gen_prefix = gen_prefix
        self.do_augment_data = do_augment_data
        self.data_augment_pct = data_augment_pct
        self.score_draw_as_random_hack = score_draw_as_random_hack

        man = get_manager()
        self.data_path = man.samples_path(self.transformer.game, gen_prefix)
        self.summary_path = os.path.join(self.data_path, "gendata_summary.json")

        self.summary = self.get_summary()
        self.save_summary_file()
        bcolz.set_nthreads(4)
Beispiel #5
0
import os
import sys
import bcolz
import numpy as np
from glob import glob
from moviepy.editor import VideoFileClip

sys.path.append(".")
import paths
from utils import rgb2gray
from utils.plt import show_animate

bcolz.set_nthreads(4)
def main():
    dst_dir = paths.CON_PREP
    if not os.path.exists(dst_dir): os.mkdir(dst_dir)

    dump(dst_dir+"train/", glob(paths.CON_VID_TRAIN))
    dump(dst_dir+"valid/", glob(paths.CON_VID_VALID))
    dump(dst_dir+"test/", glob(paths.CON_VID_TEST))

def dump(dst_dir, vid_paths):
    if not os.path.exists(dst_dir): os.mkdir(dst_dir)

    vid_paths.sort()
    print dst_dir, len(vid_paths)
    vid_paths = [p for p in vid_paths if p[-5]=="K"]
    # print len(vid_paths)

    for i, path in enumerate(vid_paths):
        class_dir = dst_dir + path.split("/")[-2] + "/"
Beispiel #6
0
from ssl import SSLError

import bqueryd
from bqueryd.messages import msg_factory, WorkerRegisterMessage, ErrorMessage, BusyMessage, StopMessage, \
    DoneMessage, TicketDoneMessage
from bqueryd.tool import rm_file_or_dir

DATA_FILE_EXTENSION = '.bcolz'
DATA_SHARD_FILE_EXTENSION = '.bcolzs'
# timeout in ms : how long to wait for network poll, this also affects frequency of seeing new controllers and datafiles
POLLING_TIMEOUT = 5000
# how often in seconds to send a WorkerRegisterMessage
WRM_DELAY = 20
MAX_MEMORY_KB = 2 * (2 ** 20)  # Max memory of 2GB, in Kilobytes
DOWNLOAD_DELAY = 5  # how often in seconds to check for downloads
bcolz.set_nthreads(1)


class WorkerBase(object):
    def __init__(self, data_dir=bqueryd.DEFAULT_DATA_DIR, redis_url='redis://127.0.0.1:6379/0', loglevel=logging.DEBUG,
                 restart_check=True, azure_conn_string=None):
        if not os.path.exists(data_dir) or not os.path.isdir(data_dir):
            raise Exception("Datadir %s is not a valid directory" % data_dir)
        self.worker_id = binascii.hexlify(os.urandom(8))
        self.node_name = socket.gethostname()
        self.data_dir = data_dir
        self.data_files = set()
        self.restart_check = restart_check
        context = zmq.Context()
        self.socket = context.socket(zmq.ROUTER)
        self.socket.setsockopt(zmq.LINGER, 500)
Beispiel #7
0
    def prepare(self):
        """
        Prepare the dataloader, by storing values to static fields of this class
        In this case, only filenames are loaded prematurely
        :return:
        """

        bcolz.set_nthreads(2)

        # step 0: load only when not loaded yet
        if TRAINING in self.data and VALIDATION in self.data: return

        # step 1: load the file names
        patients = sorted(glob.glob(self.location + '/*/'))

        print len(patients), "patients"
        # sys.exit()

        labels = dict()
        with open(paths.LABELS_PATH, 'rb') as csvfile:
            reader = csv.reader(csvfile, delimiter=',', quotechar='|')
            next(reader)  # skip the header
            for row in reader:
                labels[str(row[0])] = int(row[1])

        # make a stratified validation set
        # note, the seed decides the validation set, but it is deterministic in the file_names and labels
        random.seed(317070)
        ids_per_label = [[
            patient_id for patient_id, label in labels.iteritems()
            if label == l
        ] for l in [0, 1]]
        validation_patients = sum([
            random.sample(sorted(ids), int(VALIDATION_SET_SIZE * len(ids)))
            for ids in ids_per_label
        ], [])

        if self.use_luna:
            luna_labels = load_luna_labels(patients)
            print len(luna_labels), "luna labels added"
            labels.update(luna_labels)

        # make the static data empty
        for s in self.datasets:
            self.data[s] = []
            self.labels[s] = []
            self.names[s] = []
            self.spacings[s] = []

        with gzip.open(paths.INTERMEDIATE_DATA_PATH + 'spacings.pkl.gz') as f:
            spacings = cPickle.load(f)

        # load the filenames and put into the right dataset
        for i, patient_folder in enumerate(patients):
            patient_id = str(patient_folder.split(path.sep)[-2])
            if patient_id in labels:
                if patient_id in validation_patients:
                    dataset = VALIDATION
                else:
                    dataset = TRAIN
            else:
                dataset = TEST

            self.data[dataset].append(patient_folder)
            if patient_id in labels:
                self.labels[dataset].append(labels[patient_id])
            self.names[dataset].append(patient_id)
            self.spacings[dataset].append(spacings[patient_id])

        # give every patient a unique number
        last_index = -1
        for set in self.datasets:
            self.indices[set] = range(last_index + 1,
                                      last_index + 1 + len(self.data[set]))
            if len(self.indices[set]) > 0:
                last_index = self.indices[set][-1]
            print set, len(self.indices[set]), "samples"
Beispiel #8
0
:Author: `Aymeric Rateau <https://github.com/ratal/mdfreader>`__

Dependencies
-------------------
- Python >2.6, >3.2 <http://www.python.org>
- Numpy >1.6 <http://numpy.scipy.org>

mdf module
--------------------------
"""
try:
    CompressionPossible = True
    from bcolz import cparams, carray, detect_number_of_cores, set_nthreads
    _ncores = detect_number_of_cores()
    set_nthreads(_ncores)
    from blosc import decompress_ptr, compress_ptr
except ImportError:
    # Cannot compress data, please install bcolz and blosc
    CompressionPossible = False

from pandas import set_option
from collections import OrderedDict,defaultdict
from numpy import array_repr, set_printoptions, recarray, empty
set_printoptions(threshold=100, edgeitems=1)
_notAllowedChannelNames = set(dir(recarray))
from io import open
from zipfile import is_zipfile, ZipFile
from itertools import chain
from random import choice
from string import ascii_letters
Beispiel #9
0
    def prepare(self):
        """
        Prepare the dataloader, by storing values to static fields of this class
        In this case, only filenames are loaded prematurely
        :return:
        """
        bcolz.set_nthreads(2)

        # step 0: load only when not loaded yet
        if TRAINING in self.data and VALIDATION in self.data: return

        # step 1: load the file names
        patients = sorted(glob.glob(self.location+'/*.*/'))
        print len(patients), "patients"

        # step 1: load the file names
        # make a stratified validation set
        # note, the seed decides the validation set, but it is deterministic in the names
        random.seed(317070)
        patient_names = [self.patient_name_from_file_name(f) for f in patients]
        validation_patients = random.sample(patient_names, int(VALIDATION_SET_SIZE*len(patient_names)))

        labels_as_dict = defaultdict(list)

        with open(paths.LUNA_LABELS_PATH, 'rb') as csvfile:
            reader = csv.reader(csvfile, delimiter=',', quotechar='|')
            next(reader)  # skip the header
            for row in reader:
                label = (float(row[1]), float(row[2]), float(row[3]), float(row[4]))
                labels_as_dict[str(row[0])].append(label)

        # make the static data empty
        for s in self.datasets:
            self.data[s] = []
            self.labels[s] = []
            self.names[s] = []
            self.spacings[s] = []
            self.origins[s] = []

        with gzip.open(paths.INTERMEDIATE_DATA_PATH + 'spacings.pkl.gz') as f:
            spacings = cPickle.load(f)

        with gzip.open(paths.INTERMEDIATE_DATA_PATH + 'origins.pkl.gz') as f:
            origins = cPickle.load(f)

        # load the filenames and put into the right dataset
        for i, patient_folder in enumerate(patients):
            patient_id = str(patient_folder.split(path.sep)[-2])
            if patient_id in validation_patients:
                dataset = VALIDATION
            else:
                dataset = TRAIN


            label = labels_as_dict[patient_id]
            if self.only_positive and not label:
                continue

            self.data[dataset].append(patient_folder)
            self.labels[dataset].append(label)
            self.names[dataset].append(patient_id)
            self.spacings[dataset].append(spacings[patient_id])
            self.origins[dataset].append(origins[patient_id])

        # give every patient a unique number
        last_index = -1
        for set in self.datasets:
            self.indices[set] = range(last_index+1,last_index+1+len(self.data[set]))
            if len(self.indices[set]) > 0:
                last_index = self.indices[set][-1]
            print set, len(self.indices[set]), "samples"
Beispiel #10
0
:Author: `Aymeric Rateau <https://github.com/ratal/mdfreader>`__

Dependencies
-------------------
- Python >2.6, >3.2 <http://www.python.org>
- Numpy >1.6 <http://numpy.scipy.org>

mdf module
--------------------------
"""
try:
    CompressionPossible = True
    from bcolz import cparams, carray, detect_number_of_cores, set_nthreads
    _ncores = detect_number_of_cores()
    set_nthreads(_ncores)
    from blosc import decompress_ptr, compress_ptr
except ImportError:
    # Cannot compress data, please install bcolz and blosc
    CompressionPossible = False

from pandas import set_option
from collections import OrderedDict, defaultdict
from numpy import array_repr, set_printoptions, recarray, empty
set_printoptions(threshold=100, edgeitems=1)
_notAllowedChannelNames = set(dir(recarray))
from io import open
from zipfile import is_zipfile, ZipFile
from itertools import chain
from random import choice
from string import ascii_letters
Beispiel #11
0
def main():
    ###############################################################################################################
    # INITIALIZE
    ###############################################################################################################

    args = parse_args()
    cfg_name = args.config.split("/")[-1][:-3]
    expid = "%s-%s-%s" % (cfg_name, platform.node(),
                          strftime("%Y%m%d-%H%M%S", localtime()))
    cfg = importlib.import_module("models.%s" % cfg_name)
    is_resume = args.resume != ""
    if is_resume:
        meta = cPickle.load(open(args.resume, "rb"))
        cfg_name = meta["configuration"]
        expid = meta["experiment_id"]
        params = meta["parameters"]
    train_loss = meta["train_loss"] if is_resume else []
    valid_loss = meta["valid_loss"] if is_resume else []
    start_epoch = meta["epoch"] if is_resume else -1
    if is_resume: resume_learning_rate(cfg, start_epoch)

    if not os.path.exists("metadata/"): os.mkdir("metadata")

    log = TrainLogger("metadata/%s.log" % expid)
    log.print_to_log()

    print "Model:", cfg_name, "expid:", expid
    print "batch_size", cfg.batch_size, "batches_per_chunk", cfg.batches_per_chunk, \
        "learning_rate", cfg.learning_rate, "validate_every_n_chunks", cfg.validate_every_n_chunks, \
        "n_valid_chunks", cfg.n_valid_chunks

    cfg.data_loader.start(n_jobs=args.jobs)

    ###############################################################################################################
    # BUILD
    ###############################################################################################################

    print "Building model ..."
    model = cfg.build_model()
    if is_resume:
        nn.layers.helper.set_all_param_values(model["output"], params)
    if hasattr(cfg, "is_pretrained") and cfg.is_pretrained:
        cfg.set_pretrained_params(model)
    all_layers = nn.layers.get_all_layers(model["output"])
    all_params = nn.layers.get_all_params(model["output"], trainable=True)
    print_model(
        all_layers, all_params
    )  #, {"WeightNormLayer", "LayerNormLayer", "BatchNormLayer", "NonlinearityLayer"})

    print "Building objectives ..."
    loss = cfg.build_objectives(model, deterministic=False)
    eval_outputs = cfg.build_objectives(model,
                                        deterministic=True)  # ordered dict

    print "Building updates ..."
    learning_rate = theano.shared(np.float32(cfg.learning_rate))
    all_grads = theano.grad(loss, all_params, disconnected_inputs='warn')
    updates = cfg.build_updates(all_grads, all_params, learning_rate)

    print "Compiling training function"
    train_outputs = [loss]
    train_outputs += cfg.toprint.values()  #OrderedDict!

    batch_idx = T.iscalar('idx')
    data_shared = OrderedDict({
        tag: nn.utils.shared_empty(len(arr.shape) + 1, arr.dtype)
        for (tag, arr) in cfg.data_tags.items()
    })
    givens = OrderedDict()
    for (tag, l) in model["input"].items():
        givens[l.input_var] = data_shared[tag][batch_idx *
                                               cfg.batch_size:(batch_idx + 1) *
                                               cfg.batch_size]

    iter_train = theano.function([batch_idx],
                                 train_outputs,
                                 updates=updates,
                                 givens=givens)

    print "Compiling validation function"
    iter_valid = theano.function([batch_idx],
                                 eval_outputs.values(),
                                 givens=givens)

    ###############################################################################################################
    # PREPARE
    ###############################################################################################################

    if hasattr(cfg, "preparation") and not is_resume:
        print "Preparing model (weight init etc)..."
        t_init = time()
        cfg.preparation(model, batch_idx, givens, data_shared)
        print "  took %.3f seconds" % (time() - t_init, )


###############################################################################################################
# TRAIN
###############################################################################################################

    print "Training ..."

    if hasattr(args, "bcolz"):
        import bcolz
        bcolz.set_nthreads(args.bcolz)
    train_time, total_time = 0, time()
    start_time = time()
    chunk_size = cfg.batch_size * cfg.batches_per_chunk
    n_epochs = int(cfg.n_updates /
                   float(cfg.validate_every_n_chunks * cfg.batches_per_chunk))
    chunk_loss = []
    valid_jac = []
    for epoch in xrange(start_epoch + 1, n_epochs):

        # EVALUATION

        if epoch != 0:
            # if True:
            for loss_list, set_ in ((valid_loss, VALID), (train_loss, TRAIN)):
                print "Evaluating %s set..." % set_
                valid_time = time()
                chunk_gen = cfg.data_loader.chunk_generator(
                    n_chunks=cfg.n_valid_chunks,
                    chunk_size=chunk_size,
                    set=set_)

                chunk_res = OrderedDict(
                    {tag: []
                     for tag in eval_outputs.keys()})
                chunk_res["JI"] = []

                for c, chunk in enumerate(chunk_gen):
                    for key in data_shared:
                        data_shared[key].set_value(
                            chunk[key], borrow=cfg.borrow_shared_data)

                    for b in range(cfg.batches_per_chunk):
                        iter_valid_result = iter_valid(b)
                        for i, tag in enumerate(eval_outputs.keys()):
                            chunk_res[tag].append(iter_valid_result[i])

                    preds = np.vstack(
                        chunk_res["_preds"][-cfg.batches_per_chunk:]).reshape(
                            (-1, cfg.n_frames))[:,
                                                4:-4].flatten().astype("int32")
                    preds = np.eye(249)[preds]
                    targets = chunk["target"][:, 4:-4].flatten()
                    targets = np.eye(249)[targets]

                    intersection = np.sum(targets * preds, axis=0)
                    tarsum = np.sum(targets, axis=0)
                    union = tarsum + np.sum(preds, axis=0) - intersection
                    ji = intersection / (union + 1e-9)
                    ji = ji.sum() / np.count_nonzero(tarsum)
                    # print preds.shape, targets.shape, preds, targets
                    # ji = jaccard_similarity_score(targets, preds)
                    # print preds.shape, preds.dtype, targets.shape, targets.dtype, ji, np.count_nonzero(tarsum)
                    chunk_res["JI"].append(ji)

                    if hasattr(cfg, "evaluate"):
                        cfg.evaluate(chunk_res, chunk, expid, set_)

                loss_list.append(np.mean(chunk_res["loss"]))
                if set_ == VALID: valid_jac.append(np.mean(chunk_res["JI"]))

                toprint = "best=%.3f" % (np.min(loss_list) if set_ == TRAIN
                                         else np.max(valid_jac))
                for tag, res in chunk_res.items():
                    if tag.startswith("_"): continue
                    toprint += " %s=%.3f" % (tag, np.mean(res))

                print toprint

                # chunk_res["_preds"]

        # SAVING PARAMS

        if epoch != 0 and valid_jac[-1] == np.max(valid_jac):
            # if valid_jac[-1] == np.max(valid_jac):
            try:
                metadata_tmp_path = "/var/tmp/%s.pkl" % expid
                metadata_target_path = "metadata/%s.pkl" % expid
                print "Saving in", metadata_target_path

                with open(metadata_tmp_path, 'w') as f:
                    cPickle.dump(
                        {
                            'configuration':
                            cfg_name,
                            'experiment_id':
                            expid,
                            'train_loss':
                            train_loss,
                            'valid_loss':
                            valid_loss,
                            'parameters':
                            nn.layers.get_all_param_values(model["output"]),
                            'epoch':
                            epoch,
                        }, f, cPickle.HIGHEST_PROTOCOL)

                try:
                    shutil.move(metadata_tmp_path, metadata_target_path)
                except Exception as e:
                    print e
            except:
                print "saving failed"

        if epoch != 0:
            plot_progress(train_loss, valid_loss,
                          "metadata/%s--plot.pdf" % expid)
            print "Evaluation time:%.3fs" % (time() - valid_time)

        # TRAINING

        chunk_gen = cfg.data_loader.chunk_generator(
            n_chunks=cfg.validate_every_n_chunks,
            chunk_size=chunk_size,
            set=TRAIN)

        for c, chunk in enumerate(chunk_gen):
            for key in data_shared:
                data_shared[key].set_value(chunk[key],
                                           borrow=cfg.borrow_shared_data)

            total_n_chunks = epoch * cfg.validate_every_n_chunks + c
            if total_n_chunks % cfg.print_every_n_chunks == 0:
                sys.stdout.write("\r" + " " * 100 + "\r")
                sys.stdout.flush()
                log.print_to_log()
                print "Chunk %i updates %i samples %i lr %.2e time %s" % \
                      (total_n_chunks,
                       total_n_chunks * cfg.batches_per_chunk,
                       total_n_chunks * chunk_size,
                       learning_rate.get_value(),
                       secs_to_str(time() - start_time))
                total_time = stopwatch(total_time)
                print "Time / sample = %.3fms (%.3fms + %.3fms overhead)" % \
                      (total_time / (cfg.print_every_n_chunks * chunk_size),
                       train_time / (cfg.print_every_n_chunks * chunk_size),
                       (total_time - train_time) / (cfg.print_every_n_chunks * chunk_size))
                train_time, total_time = 0, time()
                val_loss = valid_jac[-1] if len(valid_jac) > 0 else np.inf
                min_val_loss = np.max(
                    valid_jac) if len(valid_jac) > 0 else np.inf
                print "Train loss = %.3f,  Valid loss = %.3f (best: %.3f)\n" % \
                      (np.mean(chunk_loss), val_loss, min_val_loss)
                chunk_loss = []

            log.only_print_to_console()

            batch_loss = []
            for b in range(cfg.batches_per_chunk):
                t0 = time()
                iter_train_result = iter_train(b)
                train_time += stopwatch(t0)

                batch_loss.append(iter_train_result[0])

                # learning decay
                new_lr = cfg.learning_rate * calculate_lr_decay(
                    cfg, epoch, c, b)
                learning_rate.set_value(np.float32(new_lr))

                toprint = "\r" + " " * 100 + "\rl=%.4f" % (batch_loss[-1], )
                for i, res in enumerate(iter_train_result[1:]):
                    toprint += " %s=%.3f" % (cfg.toprint.keys()[i], res)
                sys.stdout.write(toprint)
                sys.stdout.flush()

            chunk_loss.append(np.mean(batch_loss))

            detect_nans(chunk_loss[-1], all_params, data_shared)

        # end of training per epoch

        sys.stdout.write("\r" + " " * 100 + "\r")
        sys.stdout.flush()
        log.print_to_log()
Beispiel #12
0
def main():
    ###############################################################################################################
    # INITIALIZE
    ###############################################################################################################

    args = parse_args()
    set_ = args.set
    meta = cPickle.load(open(args.meta, "rb"))
    cfg_name = meta["configuration"]
    expid = meta["experiment_id"] + "--" + set_
    params = meta["parameters"]

    cfg = importlib.import_module("models.%s" % cfg_name)

    log = TrainLogger("metadata/%s.log" % expid)
    log.print_to_log()

    print "\n"
    print "EVALUATING", set_, "SET!"
    print "\n"
    print "Model:", cfg_name, "expid:", expid
    print "batch_size", cfg.batch_size, "batches_per_chunk", cfg.batches_per_chunk, \
        "learning_rate", cfg.learning_rate, "validate_every_n_chunks", cfg.validate_every_n_chunks, \
        "n_valid_chunks", cfg.n_valid_chunks

    data_path = cfg.data_loader.data_path
    print data_path
    vid_meta = cPickle.load(open("./data/vidmeta.pkl", "rb"))
    # print vid_meta

    vid_paths = glob(data_path + "*/*/*")
    vid_paths.sort()
    print len(vid_paths), len(vid_meta)
    set_vid_idxs = [i for i, p in enumerate(vid_paths) if set_ in p]
    print len(set_vid_idxs), set_, "videos"

    # sys.exit()

    cfg.data_loader.predict = True
    cfg.data_loader.start(n_jobs=args.jobs)

    ###############################################################################################################
    # BUILD
    ###############################################################################################################

    print "Building model ..."
    model = cfg.build_model()
    nn.layers.helper.set_all_param_values(model["output"], params)
    if hasattr(cfg, "is_pretrained") and cfg.is_pretrained:
        cfg.set_pretrained_params(model)
    all_layers = nn.layers.get_all_layers(model["output"])
    all_params = nn.layers.get_all_params(model["output"], trainable=True)
    print_model(
        all_layers, all_params
    )  #, {"WeightNormLayer", "LayerNormLayer", "BatchNormLayer", "NonlinearityLayer"})
    model_out = nn.layers.get_output(model["output"], deterministic=True)

    # batch_idx = T.iscalar('idx')
    # data_shared = OrderedDict({tag: nn.utils.shared_empty(len(arr.shape)+1, arr.dtype)
    #                            for (tag, arr) in cfg.data_tags.items()})
    # givens = OrderedDict()
    # for (tag, l) in model["input"].items():
    inp = model["input"]["video"]
    # givens[inp.input_var] = data_shared["video"][batch_idx*cfg.batch_size : (batch_idx+1)*cfg.batch_size]

    # print model["input"].keys()
    print "Compiling evaluation function"
    iter_eval = theano.function([inp.input_var], [model_out])

    ###############################################################################################################
    # PREDICT
    ###############################################################################################################

    print "Predicting ..."

    import bcolz
    if hasattr(args, "bcolz"):
        bcolz.set_nthreads(args.bcolz)

    chunk_size = 1  #cfg.batch_size * cfg.batches_per_chunk
    cut_off = 8

    preprocessors = cfg.data_loader.preprocessors
    vidprep = None
    classperframe = None
    for p in preprocessors:
        if "VideoLoadPrep" == p.__class__.__name__:
            vidprep = p
        elif "ClassPerFrame" == p.__class__.__name__:
            classperframe = p

    print "Evaluating %s set..." % set_

    stride = cfg.n_frames - cut_off * 2
    import scipy.stats
    import string

    s_preds = []

    pred_dir = "./predictions/"
    if not os.path.exists(pred_dir): os.mkdir(pred_dir)
    # pred_file = open(pred_dir+expid+"txt", "w")

    for i, vid_idx in enumerate(set_vid_idxs):
        path = vid_paths[vid_idx]
        print path

        reader = bcolz.open(path, mode="r")
        max_frames = reader.shape[0]

        # max_frames = vid_meta[vid_idx]["max_frames"]
        # n_chunks = int(np.ceil((max_frames-cut_off) / float(stride)))
        s_pred = []
        j = 0
        while True:
            start_frame = j * stride
            end_frame = start_frame + cfg.n_frames
            if j != 0 and end_frame - cut_off >= max_frames: break
            j += 1

            fragment, start, end = vidprep.get_fragment(start_frame,
                                                        reader,
                                                        push_start=False)
            fraglen = len(fragment)
            if end_frame >= max_frames:
                in_vid = np.zeros((cfg.n_frames, ) + cfg.im_shp, "float32")
                in_vid[:fraglen] = fragment
            else:
                in_vid = fragment

            in_vid.shape = (1, ) + (cfg.n_frames, ) + cfg.im_shp
            iter_result = iter_eval(in_vid)[0]
            if end_frame >= max_frames:
                cut_end = fraglen
            else:
                cut_end = cfg.n_frames - cut_off
            preds = np.argmax(iter_result[cut_off:cut_end], axis=1)
            # print start, end, preds
            s_pred.append(preds)
        del reader
        s_pred = np.hstack(s_pred)
        if len(s_pred) > 0:
            s_pred = np.hstack((np.repeat(s_pred[0],
                                          cut_off), s_pred)).astype("int32")
            if len(s_pred) != max_frames:
                last = scipy.stats.mode(s_pred[:-8])[0]
                s_pred = np.hstack(
                    (s_pred, np.repeat(last, max_frames - len(s_pred))))
            # print path, max_frames, len(s_pred), s_pred
            # print classperframe.framewise_lbls[vid_idx]
            # print
        else:
            print
            print max_frames, len(s_pred)
            print

        s_preds.append(s_pred)

        # pred_file.write(string.join(path.split("/")[-2:], "/"))
        # begin = 1
        # prev = None
        # for f, p in enumerate(s_pred):
        #     if prev is None: prev = p
        #     elif prev != p:
        #         pred_file.write(" %i,%i:%i"%(begin, f, p+1))
        #         begin = f+1
        #         prev = p
        #     elif f == len(s_pred-1):
        #         pred_file.write(" %i,%i:%i" % (begin, f+1, p + 1))
        #
        # pred_file.write("\n")
        try:
            assert max_frames == len(s_pred)
        except:
            print "\n\t !!! ", max_frames, len(s_pred), "\n"

    cPickle.dump(s_preds,
                 open(pred_dir + expid + ".pkl", "wb"),
                 protocol=cPickle.HIGHEST_PROTOCOL)

    # pred_file.close()

    sys.exit()

    chunk_gen = cfg.data_loader.chunk_generator(n_chunks="all",
                                                chunk_size=chunk_size,
                                                set=set_)

    jaccard = []
    preds = []

    for c, chunk in enumerate(chunk_gen):
        for key in data_shared:
            data_shared[key].set_value(chunk[key],
                                       borrow=cfg.borrow_shared_data)

        for b in range(cfg.batches_per_chunk):
            iter_result = iter_eval(b)[0]
            print iter_result.shape
            preds.append(iter_result)

        p = np.vstack(preds[-cfg.batches_per_chunk:]).reshape(
            (-1, cfg.n_frames))
        jaccard.append(calc_ji(p, chunk["target"]))
        print c, jaccard[-1]

    print "mean JI =", np.mean(jaccard)