def _init_ctable(self, path): """ Create empty ctable for given path. Obtain 、Create 、Append、Attr empty ctable for given path. addcol(newcol[, name, pos, move]) Add a new newcol object as column. append(cols) Append cols to this ctable -- e.g. : ctable Flush data in internal buffers to disk: This call should typically be done after performing modifications (__settitem__(), append()) in persistence mode. If you don’t do this, you risk losing part of your modifications. Parameters ---------- path : string The path to rootdir of the new ctable. """ bcolz_dir = os.path.dirname(path) print('bcolz_dir', bcolz_dir) if not os.path.exists(bcolz_dir): os.makedirs(bcolz_dir) print('path', path) initial_array = np.empty(0, np.uint32) # 配置bcolz bcolz.set_nthreads(Num * bcolz.detect_number_of_cores()) # Print all the versions of packages that bcolz relies on. bcolz.print_versions() """ clevel : int (0 <= clevel < 10) The compression level. shuffle : int The shuffle filter to be activated. Allowed values are bcolz.NOSHUFFLE (0), bcolz.SHUFFLE (1) and bcolz.BITSHUFFLE (2). The default is bcolz.SHUFFLE. cname : string (‘blosclz’, ‘lz4’, ‘lz4hc’, ‘snappy’, ‘zlib’, ‘zstd’) Select the compressor to use inside Blosc. quantize : int (number of significant digits) Quantize data to improve (lossy) compression. Data is quantized using np.around(scale*data)/scale, where scale is 2**bits, and bits is determined from the quantize value. For example, if quantize=1, bits will be 4. 0 means that the quantization is disabled. default : cparams(clevel=5, shuffle=1, cname='lz4', quantize=0) """ params = bcolz.cparams(clevel=9) table = bcolz.ctable( rootdir=path, columns=[ initial_array, initial_array, initial_array, initial_array, initial_array, initial_array, initial_array, ], names=self._bcolz_fields, mode='w', cparams=params ) print('cparams', table.cparams) table.flush() table = self._init_attr(table, path) # table.attrs['metadata'] = self._init_metadata(path) return table
def cli(ctx): import socket ctx.obj['HOSTNAME'] = socket.gethostname() import bcolz bcolz.set_nthreads(1) # logging.basicConfig(stream=sys.stdout, level='DEBUG' if debug else 'INFO') # logger.error('Debug mode is %s' % ('on' if debug else 'off')) pd.set_option('display.max_rows', 100000000) pd.set_option('display.max_columns', 100000000) pd.set_option('display.width', 100000000)
def __init__(self, transformer, gen_prefix, do_augment_data=False): self.transformer = transformer self.gen_prefix = gen_prefix self.do_augment_data = do_augment_data man = get_manager() self.data_path = man.samples_path(self.transformer.game, gen_prefix) self.summary_path = os.path.join(self.data_path, "gendata_summary.json") self.summary = self.get_summary() self.save_summary_file() bcolz.set_nthreads(4)
def __init__(self, transformer, gen_prefix, do_augment_data=False, data_augment_pct=1.0, score_draw_as_random_hack=False): self.transformer = transformer self.gen_prefix = gen_prefix self.do_augment_data = do_augment_data self.data_augment_pct = data_augment_pct self.score_draw_as_random_hack = score_draw_as_random_hack man = get_manager() self.data_path = man.samples_path(self.transformer.game, gen_prefix) self.summary_path = os.path.join(self.data_path, "gendata_summary.json") self.summary = self.get_summary() self.save_summary_file() bcolz.set_nthreads(4)
import os import sys import bcolz import numpy as np from glob import glob from moviepy.editor import VideoFileClip sys.path.append(".") import paths from utils import rgb2gray from utils.plt import show_animate bcolz.set_nthreads(4) def main(): dst_dir = paths.CON_PREP if not os.path.exists(dst_dir): os.mkdir(dst_dir) dump(dst_dir+"train/", glob(paths.CON_VID_TRAIN)) dump(dst_dir+"valid/", glob(paths.CON_VID_VALID)) dump(dst_dir+"test/", glob(paths.CON_VID_TEST)) def dump(dst_dir, vid_paths): if not os.path.exists(dst_dir): os.mkdir(dst_dir) vid_paths.sort() print dst_dir, len(vid_paths) vid_paths = [p for p in vid_paths if p[-5]=="K"] # print len(vid_paths) for i, path in enumerate(vid_paths): class_dir = dst_dir + path.split("/")[-2] + "/"
from ssl import SSLError import bqueryd from bqueryd.messages import msg_factory, WorkerRegisterMessage, ErrorMessage, BusyMessage, StopMessage, \ DoneMessage, TicketDoneMessage from bqueryd.tool import rm_file_or_dir DATA_FILE_EXTENSION = '.bcolz' DATA_SHARD_FILE_EXTENSION = '.bcolzs' # timeout in ms : how long to wait for network poll, this also affects frequency of seeing new controllers and datafiles POLLING_TIMEOUT = 5000 # how often in seconds to send a WorkerRegisterMessage WRM_DELAY = 20 MAX_MEMORY_KB = 2 * (2 ** 20) # Max memory of 2GB, in Kilobytes DOWNLOAD_DELAY = 5 # how often in seconds to check for downloads bcolz.set_nthreads(1) class WorkerBase(object): def __init__(self, data_dir=bqueryd.DEFAULT_DATA_DIR, redis_url='redis://127.0.0.1:6379/0', loglevel=logging.DEBUG, restart_check=True, azure_conn_string=None): if not os.path.exists(data_dir) or not os.path.isdir(data_dir): raise Exception("Datadir %s is not a valid directory" % data_dir) self.worker_id = binascii.hexlify(os.urandom(8)) self.node_name = socket.gethostname() self.data_dir = data_dir self.data_files = set() self.restart_check = restart_check context = zmq.Context() self.socket = context.socket(zmq.ROUTER) self.socket.setsockopt(zmq.LINGER, 500)
def prepare(self): """ Prepare the dataloader, by storing values to static fields of this class In this case, only filenames are loaded prematurely :return: """ bcolz.set_nthreads(2) # step 0: load only when not loaded yet if TRAINING in self.data and VALIDATION in self.data: return # step 1: load the file names patients = sorted(glob.glob(self.location + '/*/')) print len(patients), "patients" # sys.exit() labels = dict() with open(paths.LABELS_PATH, 'rb') as csvfile: reader = csv.reader(csvfile, delimiter=',', quotechar='|') next(reader) # skip the header for row in reader: labels[str(row[0])] = int(row[1]) # make a stratified validation set # note, the seed decides the validation set, but it is deterministic in the file_names and labels random.seed(317070) ids_per_label = [[ patient_id for patient_id, label in labels.iteritems() if label == l ] for l in [0, 1]] validation_patients = sum([ random.sample(sorted(ids), int(VALIDATION_SET_SIZE * len(ids))) for ids in ids_per_label ], []) if self.use_luna: luna_labels = load_luna_labels(patients) print len(luna_labels), "luna labels added" labels.update(luna_labels) # make the static data empty for s in self.datasets: self.data[s] = [] self.labels[s] = [] self.names[s] = [] self.spacings[s] = [] with gzip.open(paths.INTERMEDIATE_DATA_PATH + 'spacings.pkl.gz') as f: spacings = cPickle.load(f) # load the filenames and put into the right dataset for i, patient_folder in enumerate(patients): patient_id = str(patient_folder.split(path.sep)[-2]) if patient_id in labels: if patient_id in validation_patients: dataset = VALIDATION else: dataset = TRAIN else: dataset = TEST self.data[dataset].append(patient_folder) if patient_id in labels: self.labels[dataset].append(labels[patient_id]) self.names[dataset].append(patient_id) self.spacings[dataset].append(spacings[patient_id]) # give every patient a unique number last_index = -1 for set in self.datasets: self.indices[set] = range(last_index + 1, last_index + 1 + len(self.data[set])) if len(self.indices[set]) > 0: last_index = self.indices[set][-1] print set, len(self.indices[set]), "samples"
:Author: `Aymeric Rateau <https://github.com/ratal/mdfreader>`__ Dependencies ------------------- - Python >2.6, >3.2 <http://www.python.org> - Numpy >1.6 <http://numpy.scipy.org> mdf module -------------------------- """ try: CompressionPossible = True from bcolz import cparams, carray, detect_number_of_cores, set_nthreads _ncores = detect_number_of_cores() set_nthreads(_ncores) from blosc import decompress_ptr, compress_ptr except ImportError: # Cannot compress data, please install bcolz and blosc CompressionPossible = False from pandas import set_option from collections import OrderedDict,defaultdict from numpy import array_repr, set_printoptions, recarray, empty set_printoptions(threshold=100, edgeitems=1) _notAllowedChannelNames = set(dir(recarray)) from io import open from zipfile import is_zipfile, ZipFile from itertools import chain from random import choice from string import ascii_letters
def prepare(self): """ Prepare the dataloader, by storing values to static fields of this class In this case, only filenames are loaded prematurely :return: """ bcolz.set_nthreads(2) # step 0: load only when not loaded yet if TRAINING in self.data and VALIDATION in self.data: return # step 1: load the file names patients = sorted(glob.glob(self.location+'/*.*/')) print len(patients), "patients" # step 1: load the file names # make a stratified validation set # note, the seed decides the validation set, but it is deterministic in the names random.seed(317070) patient_names = [self.patient_name_from_file_name(f) for f in patients] validation_patients = random.sample(patient_names, int(VALIDATION_SET_SIZE*len(patient_names))) labels_as_dict = defaultdict(list) with open(paths.LUNA_LABELS_PATH, 'rb') as csvfile: reader = csv.reader(csvfile, delimiter=',', quotechar='|') next(reader) # skip the header for row in reader: label = (float(row[1]), float(row[2]), float(row[3]), float(row[4])) labels_as_dict[str(row[0])].append(label) # make the static data empty for s in self.datasets: self.data[s] = [] self.labels[s] = [] self.names[s] = [] self.spacings[s] = [] self.origins[s] = [] with gzip.open(paths.INTERMEDIATE_DATA_PATH + 'spacings.pkl.gz') as f: spacings = cPickle.load(f) with gzip.open(paths.INTERMEDIATE_DATA_PATH + 'origins.pkl.gz') as f: origins = cPickle.load(f) # load the filenames and put into the right dataset for i, patient_folder in enumerate(patients): patient_id = str(patient_folder.split(path.sep)[-2]) if patient_id in validation_patients: dataset = VALIDATION else: dataset = TRAIN label = labels_as_dict[patient_id] if self.only_positive and not label: continue self.data[dataset].append(patient_folder) self.labels[dataset].append(label) self.names[dataset].append(patient_id) self.spacings[dataset].append(spacings[patient_id]) self.origins[dataset].append(origins[patient_id]) # give every patient a unique number last_index = -1 for set in self.datasets: self.indices[set] = range(last_index+1,last_index+1+len(self.data[set])) if len(self.indices[set]) > 0: last_index = self.indices[set][-1] print set, len(self.indices[set]), "samples"
:Author: `Aymeric Rateau <https://github.com/ratal/mdfreader>`__ Dependencies ------------------- - Python >2.6, >3.2 <http://www.python.org> - Numpy >1.6 <http://numpy.scipy.org> mdf module -------------------------- """ try: CompressionPossible = True from bcolz import cparams, carray, detect_number_of_cores, set_nthreads _ncores = detect_number_of_cores() set_nthreads(_ncores) from blosc import decompress_ptr, compress_ptr except ImportError: # Cannot compress data, please install bcolz and blosc CompressionPossible = False from pandas import set_option from collections import OrderedDict, defaultdict from numpy import array_repr, set_printoptions, recarray, empty set_printoptions(threshold=100, edgeitems=1) _notAllowedChannelNames = set(dir(recarray)) from io import open from zipfile import is_zipfile, ZipFile from itertools import chain from random import choice from string import ascii_letters
def main(): ############################################################################################################### # INITIALIZE ############################################################################################################### args = parse_args() cfg_name = args.config.split("/")[-1][:-3] expid = "%s-%s-%s" % (cfg_name, platform.node(), strftime("%Y%m%d-%H%M%S", localtime())) cfg = importlib.import_module("models.%s" % cfg_name) is_resume = args.resume != "" if is_resume: meta = cPickle.load(open(args.resume, "rb")) cfg_name = meta["configuration"] expid = meta["experiment_id"] params = meta["parameters"] train_loss = meta["train_loss"] if is_resume else [] valid_loss = meta["valid_loss"] if is_resume else [] start_epoch = meta["epoch"] if is_resume else -1 if is_resume: resume_learning_rate(cfg, start_epoch) if not os.path.exists("metadata/"): os.mkdir("metadata") log = TrainLogger("metadata/%s.log" % expid) log.print_to_log() print "Model:", cfg_name, "expid:", expid print "batch_size", cfg.batch_size, "batches_per_chunk", cfg.batches_per_chunk, \ "learning_rate", cfg.learning_rate, "validate_every_n_chunks", cfg.validate_every_n_chunks, \ "n_valid_chunks", cfg.n_valid_chunks cfg.data_loader.start(n_jobs=args.jobs) ############################################################################################################### # BUILD ############################################################################################################### print "Building model ..." model = cfg.build_model() if is_resume: nn.layers.helper.set_all_param_values(model["output"], params) if hasattr(cfg, "is_pretrained") and cfg.is_pretrained: cfg.set_pretrained_params(model) all_layers = nn.layers.get_all_layers(model["output"]) all_params = nn.layers.get_all_params(model["output"], trainable=True) print_model( all_layers, all_params ) #, {"WeightNormLayer", "LayerNormLayer", "BatchNormLayer", "NonlinearityLayer"}) print "Building objectives ..." loss = cfg.build_objectives(model, deterministic=False) eval_outputs = cfg.build_objectives(model, deterministic=True) # ordered dict print "Building updates ..." learning_rate = theano.shared(np.float32(cfg.learning_rate)) all_grads = theano.grad(loss, all_params, disconnected_inputs='warn') updates = cfg.build_updates(all_grads, all_params, learning_rate) print "Compiling training function" train_outputs = [loss] train_outputs += cfg.toprint.values() #OrderedDict! batch_idx = T.iscalar('idx') data_shared = OrderedDict({ tag: nn.utils.shared_empty(len(arr.shape) + 1, arr.dtype) for (tag, arr) in cfg.data_tags.items() }) givens = OrderedDict() for (tag, l) in model["input"].items(): givens[l.input_var] = data_shared[tag][batch_idx * cfg.batch_size:(batch_idx + 1) * cfg.batch_size] iter_train = theano.function([batch_idx], train_outputs, updates=updates, givens=givens) print "Compiling validation function" iter_valid = theano.function([batch_idx], eval_outputs.values(), givens=givens) ############################################################################################################### # PREPARE ############################################################################################################### if hasattr(cfg, "preparation") and not is_resume: print "Preparing model (weight init etc)..." t_init = time() cfg.preparation(model, batch_idx, givens, data_shared) print " took %.3f seconds" % (time() - t_init, ) ############################################################################################################### # TRAIN ############################################################################################################### print "Training ..." if hasattr(args, "bcolz"): import bcolz bcolz.set_nthreads(args.bcolz) train_time, total_time = 0, time() start_time = time() chunk_size = cfg.batch_size * cfg.batches_per_chunk n_epochs = int(cfg.n_updates / float(cfg.validate_every_n_chunks * cfg.batches_per_chunk)) chunk_loss = [] valid_jac = [] for epoch in xrange(start_epoch + 1, n_epochs): # EVALUATION if epoch != 0: # if True: for loss_list, set_ in ((valid_loss, VALID), (train_loss, TRAIN)): print "Evaluating %s set..." % set_ valid_time = time() chunk_gen = cfg.data_loader.chunk_generator( n_chunks=cfg.n_valid_chunks, chunk_size=chunk_size, set=set_) chunk_res = OrderedDict( {tag: [] for tag in eval_outputs.keys()}) chunk_res["JI"] = [] for c, chunk in enumerate(chunk_gen): for key in data_shared: data_shared[key].set_value( chunk[key], borrow=cfg.borrow_shared_data) for b in range(cfg.batches_per_chunk): iter_valid_result = iter_valid(b) for i, tag in enumerate(eval_outputs.keys()): chunk_res[tag].append(iter_valid_result[i]) preds = np.vstack( chunk_res["_preds"][-cfg.batches_per_chunk:]).reshape( (-1, cfg.n_frames))[:, 4:-4].flatten().astype("int32") preds = np.eye(249)[preds] targets = chunk["target"][:, 4:-4].flatten() targets = np.eye(249)[targets] intersection = np.sum(targets * preds, axis=0) tarsum = np.sum(targets, axis=0) union = tarsum + np.sum(preds, axis=0) - intersection ji = intersection / (union + 1e-9) ji = ji.sum() / np.count_nonzero(tarsum) # print preds.shape, targets.shape, preds, targets # ji = jaccard_similarity_score(targets, preds) # print preds.shape, preds.dtype, targets.shape, targets.dtype, ji, np.count_nonzero(tarsum) chunk_res["JI"].append(ji) if hasattr(cfg, "evaluate"): cfg.evaluate(chunk_res, chunk, expid, set_) loss_list.append(np.mean(chunk_res["loss"])) if set_ == VALID: valid_jac.append(np.mean(chunk_res["JI"])) toprint = "best=%.3f" % (np.min(loss_list) if set_ == TRAIN else np.max(valid_jac)) for tag, res in chunk_res.items(): if tag.startswith("_"): continue toprint += " %s=%.3f" % (tag, np.mean(res)) print toprint # chunk_res["_preds"] # SAVING PARAMS if epoch != 0 and valid_jac[-1] == np.max(valid_jac): # if valid_jac[-1] == np.max(valid_jac): try: metadata_tmp_path = "/var/tmp/%s.pkl" % expid metadata_target_path = "metadata/%s.pkl" % expid print "Saving in", metadata_target_path with open(metadata_tmp_path, 'w') as f: cPickle.dump( { 'configuration': cfg_name, 'experiment_id': expid, 'train_loss': train_loss, 'valid_loss': valid_loss, 'parameters': nn.layers.get_all_param_values(model["output"]), 'epoch': epoch, }, f, cPickle.HIGHEST_PROTOCOL) try: shutil.move(metadata_tmp_path, metadata_target_path) except Exception as e: print e except: print "saving failed" if epoch != 0: plot_progress(train_loss, valid_loss, "metadata/%s--plot.pdf" % expid) print "Evaluation time:%.3fs" % (time() - valid_time) # TRAINING chunk_gen = cfg.data_loader.chunk_generator( n_chunks=cfg.validate_every_n_chunks, chunk_size=chunk_size, set=TRAIN) for c, chunk in enumerate(chunk_gen): for key in data_shared: data_shared[key].set_value(chunk[key], borrow=cfg.borrow_shared_data) total_n_chunks = epoch * cfg.validate_every_n_chunks + c if total_n_chunks % cfg.print_every_n_chunks == 0: sys.stdout.write("\r" + " " * 100 + "\r") sys.stdout.flush() log.print_to_log() print "Chunk %i updates %i samples %i lr %.2e time %s" % \ (total_n_chunks, total_n_chunks * cfg.batches_per_chunk, total_n_chunks * chunk_size, learning_rate.get_value(), secs_to_str(time() - start_time)) total_time = stopwatch(total_time) print "Time / sample = %.3fms (%.3fms + %.3fms overhead)" % \ (total_time / (cfg.print_every_n_chunks * chunk_size), train_time / (cfg.print_every_n_chunks * chunk_size), (total_time - train_time) / (cfg.print_every_n_chunks * chunk_size)) train_time, total_time = 0, time() val_loss = valid_jac[-1] if len(valid_jac) > 0 else np.inf min_val_loss = np.max( valid_jac) if len(valid_jac) > 0 else np.inf print "Train loss = %.3f, Valid loss = %.3f (best: %.3f)\n" % \ (np.mean(chunk_loss), val_loss, min_val_loss) chunk_loss = [] log.only_print_to_console() batch_loss = [] for b in range(cfg.batches_per_chunk): t0 = time() iter_train_result = iter_train(b) train_time += stopwatch(t0) batch_loss.append(iter_train_result[0]) # learning decay new_lr = cfg.learning_rate * calculate_lr_decay( cfg, epoch, c, b) learning_rate.set_value(np.float32(new_lr)) toprint = "\r" + " " * 100 + "\rl=%.4f" % (batch_loss[-1], ) for i, res in enumerate(iter_train_result[1:]): toprint += " %s=%.3f" % (cfg.toprint.keys()[i], res) sys.stdout.write(toprint) sys.stdout.flush() chunk_loss.append(np.mean(batch_loss)) detect_nans(chunk_loss[-1], all_params, data_shared) # end of training per epoch sys.stdout.write("\r" + " " * 100 + "\r") sys.stdout.flush() log.print_to_log()
def main(): ############################################################################################################### # INITIALIZE ############################################################################################################### args = parse_args() set_ = args.set meta = cPickle.load(open(args.meta, "rb")) cfg_name = meta["configuration"] expid = meta["experiment_id"] + "--" + set_ params = meta["parameters"] cfg = importlib.import_module("models.%s" % cfg_name) log = TrainLogger("metadata/%s.log" % expid) log.print_to_log() print "\n" print "EVALUATING", set_, "SET!" print "\n" print "Model:", cfg_name, "expid:", expid print "batch_size", cfg.batch_size, "batches_per_chunk", cfg.batches_per_chunk, \ "learning_rate", cfg.learning_rate, "validate_every_n_chunks", cfg.validate_every_n_chunks, \ "n_valid_chunks", cfg.n_valid_chunks data_path = cfg.data_loader.data_path print data_path vid_meta = cPickle.load(open("./data/vidmeta.pkl", "rb")) # print vid_meta vid_paths = glob(data_path + "*/*/*") vid_paths.sort() print len(vid_paths), len(vid_meta) set_vid_idxs = [i for i, p in enumerate(vid_paths) if set_ in p] print len(set_vid_idxs), set_, "videos" # sys.exit() cfg.data_loader.predict = True cfg.data_loader.start(n_jobs=args.jobs) ############################################################################################################### # BUILD ############################################################################################################### print "Building model ..." model = cfg.build_model() nn.layers.helper.set_all_param_values(model["output"], params) if hasattr(cfg, "is_pretrained") and cfg.is_pretrained: cfg.set_pretrained_params(model) all_layers = nn.layers.get_all_layers(model["output"]) all_params = nn.layers.get_all_params(model["output"], trainable=True) print_model( all_layers, all_params ) #, {"WeightNormLayer", "LayerNormLayer", "BatchNormLayer", "NonlinearityLayer"}) model_out = nn.layers.get_output(model["output"], deterministic=True) # batch_idx = T.iscalar('idx') # data_shared = OrderedDict({tag: nn.utils.shared_empty(len(arr.shape)+1, arr.dtype) # for (tag, arr) in cfg.data_tags.items()}) # givens = OrderedDict() # for (tag, l) in model["input"].items(): inp = model["input"]["video"] # givens[inp.input_var] = data_shared["video"][batch_idx*cfg.batch_size : (batch_idx+1)*cfg.batch_size] # print model["input"].keys() print "Compiling evaluation function" iter_eval = theano.function([inp.input_var], [model_out]) ############################################################################################################### # PREDICT ############################################################################################################### print "Predicting ..." import bcolz if hasattr(args, "bcolz"): bcolz.set_nthreads(args.bcolz) chunk_size = 1 #cfg.batch_size * cfg.batches_per_chunk cut_off = 8 preprocessors = cfg.data_loader.preprocessors vidprep = None classperframe = None for p in preprocessors: if "VideoLoadPrep" == p.__class__.__name__: vidprep = p elif "ClassPerFrame" == p.__class__.__name__: classperframe = p print "Evaluating %s set..." % set_ stride = cfg.n_frames - cut_off * 2 import scipy.stats import string s_preds = [] pred_dir = "./predictions/" if not os.path.exists(pred_dir): os.mkdir(pred_dir) # pred_file = open(pred_dir+expid+"txt", "w") for i, vid_idx in enumerate(set_vid_idxs): path = vid_paths[vid_idx] print path reader = bcolz.open(path, mode="r") max_frames = reader.shape[0] # max_frames = vid_meta[vid_idx]["max_frames"] # n_chunks = int(np.ceil((max_frames-cut_off) / float(stride))) s_pred = [] j = 0 while True: start_frame = j * stride end_frame = start_frame + cfg.n_frames if j != 0 and end_frame - cut_off >= max_frames: break j += 1 fragment, start, end = vidprep.get_fragment(start_frame, reader, push_start=False) fraglen = len(fragment) if end_frame >= max_frames: in_vid = np.zeros((cfg.n_frames, ) + cfg.im_shp, "float32") in_vid[:fraglen] = fragment else: in_vid = fragment in_vid.shape = (1, ) + (cfg.n_frames, ) + cfg.im_shp iter_result = iter_eval(in_vid)[0] if end_frame >= max_frames: cut_end = fraglen else: cut_end = cfg.n_frames - cut_off preds = np.argmax(iter_result[cut_off:cut_end], axis=1) # print start, end, preds s_pred.append(preds) del reader s_pred = np.hstack(s_pred) if len(s_pred) > 0: s_pred = np.hstack((np.repeat(s_pred[0], cut_off), s_pred)).astype("int32") if len(s_pred) != max_frames: last = scipy.stats.mode(s_pred[:-8])[0] s_pred = np.hstack( (s_pred, np.repeat(last, max_frames - len(s_pred)))) # print path, max_frames, len(s_pred), s_pred # print classperframe.framewise_lbls[vid_idx] # print else: print print max_frames, len(s_pred) print s_preds.append(s_pred) # pred_file.write(string.join(path.split("/")[-2:], "/")) # begin = 1 # prev = None # for f, p in enumerate(s_pred): # if prev is None: prev = p # elif prev != p: # pred_file.write(" %i,%i:%i"%(begin, f, p+1)) # begin = f+1 # prev = p # elif f == len(s_pred-1): # pred_file.write(" %i,%i:%i" % (begin, f+1, p + 1)) # # pred_file.write("\n") try: assert max_frames == len(s_pred) except: print "\n\t !!! ", max_frames, len(s_pred), "\n" cPickle.dump(s_preds, open(pred_dir + expid + ".pkl", "wb"), protocol=cPickle.HIGHEST_PROTOCOL) # pred_file.close() sys.exit() chunk_gen = cfg.data_loader.chunk_generator(n_chunks="all", chunk_size=chunk_size, set=set_) jaccard = [] preds = [] for c, chunk in enumerate(chunk_gen): for key in data_shared: data_shared[key].set_value(chunk[key], borrow=cfg.borrow_shared_data) for b in range(cfg.batches_per_chunk): iter_result = iter_eval(b)[0] print iter_result.shape preds.append(iter_result) p = np.vstack(preds[-cfg.batches_per_chunk:]).reshape( (-1, cfg.n_frames)) jaccard.append(calc_ji(p, chunk["target"])) print c, jaccard[-1] print "mean JI =", np.mean(jaccard)