Ejemplo n.º 1
0
def get_segmenter_function(params_loc, img_size, NCV=1, version=1,
        param_file_key = None):
    shape = (None, 1, img_size, img_size)
    input_var = T.tensor4('input')
    if NCV> 1:
        expr = 0
        params_files = filter(lambda s: 'fcn_v{}'.format(version) in s, os.listdir(params_loc))
        if param_file_key is not None:
            params_files = filter(lambda s: param_file_key in s, params_files)
        for pfn in params_files:
            net, _, output_det = build_fcn_segmenter(input_var, shape, version)
            u.load_params(net['output'], os.path.join(params_loc, pfn))
            cv = int(pfn.split('_')[-1][1]);
            if cv == NCV:
                expr = expr + output_det * NCV;
            else:
                expr = expr + output_det
            print 'loaded {}'.format(pfn)
        assert(len(params_files)==NCV+1);
        expr = expr / NCV /2;
        print 'loaded {} in ensemble'.format(len(params_files))
    else:
        net, _, output_det = build_fcn_segmenter(input_var, shape, version)
        u.load_params(net['output'], params_loc)
        expr = output_det
        print 'loaded indiv function {}'.format(params_loc)
    return theano.function([input_var], expr)
Ejemplo n.º 2
0
def get_segmenter_function(params_loc, img_size, ensemble=False, version=2,
        param_file_key = '.npz', weight_full_params=0.33):
    shape = (None, 1, img_size, img_size)
    input_var = T.tensor4('input')
    if ensemble:
        expr = 0
        params_files = filter(lambda s: 'v{}'.format(version) in s, os.listdir(params_loc))
        if param_file_key is not None:
            params_files = filter(lambda s: param_file_key in s, params_files)
        full_params_indices = [i for i,a in enumerate(params_files) if 'f-1' in a]
        if len(full_params_indices) > 0:
            wt_norm = (1. - weight_full_params)/(len(params_files) - len(full_params_indices))
            wt_full = weight_full_params / len(full_params_indices)
            params_weights = [(wt_norm if i not in full_params_indices else wt_full) \
                    for i in xrange(len(params_files))]
        else:
            params_weights = [1./len(params_files)] * len(params_files)
        for pfn,w in zip(params_files, params_weights):
            net, _, output_det = build_fcn_segmenter(input_var, shape, version)
            u.load_params(net['output'], os.path.join(params_loc, pfn))
            expr = expr + w*output_det
            print 'loaded {} wt {}'.format(pfn, w)
        print 'loaded {} in ensemble'.format(len(params_files))
    else:
        net, _, output_det = build_fcn_segmenter(input_var, shape, version)
        u.load_params(net['output'], params_loc)
        expr = output_det
        print 'loaded indiv function {}'.format(params_loc)
    return theano.function([input_var], expr)
Ejemplo n.º 3
0
def encoder_decoder(paramsfile, specstr, channels=3, layersplit='encode', shape=(64,64),
        poolinv=False):
    inp = T.tensor4('inputs')
    w,h=shape
    build_fn = build_cae if poolinv else build_cae_nopoolinv
    network = build_fn(inp, shape=shape,channels=channels,specstr=specstr)
    u.load_params(network, paramsfile)
    laylist = nn.layers.get_all_layers(network)
    enc_layer_idx = next(i for i in xrange(len(laylist)) if laylist[i].name==layersplit)
    enc_layer = laylist[enc_layer_idx]
    return (lambda x: nn.layers.get_output(enc_layer, inputs=x,deterministic=True).eval(),
            lambda x: nn.layers.get_output(network,
                inputs={laylist[0]:np.zeros((x.shape[0],channels,w,h),
                    dtype=theano.config.floatX),
                    enc_layer:x}, deterministic=True).eval().reshape(-1,channels,w,h))
def plot_random_GLCMs(glcms, n_per_genre, seed=11):
    """This function prints some random GLCMs from a `glcms` dictionary for each
    genre 

    Arguments:
        glcms {dict} -- a dictionary containing the tuples of glcms
        n_per_genre {int} -- the number of glcms to plot per genre
        seed {int} -- the random seed used for sampling
    """

    R = np.random.RandomState(seed)

    config = load_config()
    params = load_params()
    _, genres, _, _, n_pieces_per_genre, _ = extract_from_config(
        config, params)

    n_genres = len(genres)

    random_tracks = R.randint(0, n_pieces_per_genre, (n_genres, n_per_genre))

    fig = plt.figure(figsize=(int(1.25 * n_genres), int(2.5 * n_genres)))
    subplot = 0
    for g in range(n_genres):
        for t in range(n_per_genre):
            subplot += 1
            map_index = int(g * n_pieces_per_genre + random_tracks[g, t])
            data_to_plot = glcms["train"][map_index]
            ax = fig.add_subplot(n_genres, n_per_genre, subplot)
            ax.set_title(f"-- {data_to_plot[-1]} --")
            plt.imshow(data_to_plot[1], cmap="gray", interpolation="nearest")
            ax.axis("off")
            plt.tight_layout()

    plt.show()
Ejemplo n.º 5
0
def main(args):

    config_path = join('{}_logs'.format(args.train_dir), args.config_file)
    config_name = args.config_name
    override_params = args.override_params

    params = utils.load_params(config_path,
                               config_name,
                               override_params=override_params)
    params.train_dir = args.train_dir
    params.data_dir = args.data_dir
    params.num_gpus = args.n_gpus
    params.start_new_model = False

    # Set up environment variables before doing any other global initialization to
    # make sure it uses the appropriate environment variables.
    utils.set_default_param_values_and_env_vars(params)

    # Setup logging & log the version.
    utils.setup_logging(params.logging_verbosity)

    # print self.params parameters
    pp = pprint.PrettyPrinter(indent=2, compact=True)
    logging.info(pp.pformat(params.values()))
    logging.info("Pytorch version: {}.".format(torch.__version__))
    logging.info("Hostname: {}.".format(socket.gethostname()))

    core_eval = __import__('core.{}'.format(config_name), fromlist=[''])
    evaluate = core_eval.Evaluator(params)
    evaluate.run()
Ejemplo n.º 6
0
def main(_):

  if not FLAGS.train_dir or not FLAGS.data_dir:
    raise ValueError("train_dir and data_dir need to be set.")

  config_path = join('{}_logs'.format(FLAGS.train_dir), FLAGS.config_file)
  config_name = FLAGS.config_name
  override_params = FLAGS.override_params

  params = utils.load_params(
    config_path, config_name, override_params=override_params)
  params.train_dir = FLAGS.train_dir
  params.data_dir = FLAGS.data_dir
  params.num_gpus = FLAGS.n_gpus
  params.start_new_model = False

  if FLAGS.backend.lower() in ('tensorflow', 'tf'):
    from neuralnet.tensorflow.eval import Evaluator
  elif FLAGS.backend.lower() in ('pytorch', 'py', 'torch'):
    from neuralnet.pytorch.eval import Evaluator
  else:
    raise ValueError(
      "Backend not recognised. Choose between Tensorflow and Pytorch.")
  evaluate = Evaluator(params)
  evaluate.run()
def plot_one_map(data_map, map_type, quantized=False):
    """This function plots one map (either a spectrogram or a mel map)

    Arguments:
        data_map {tuple} -- a tuple of the form (file_name, numpy_map, piece_id,
            genre)
        map_type {string} -- one of ('spectrogram', 'mel_map') for the type of
            map to print (affects the y-scale and text)
        quantized {boolean} -- whether the input maps have already been
            quantized or not (will handle the color scale)
    """

    params = load_params()

    frame_length = int(params[map_type]["frame_length_in_s"] *
                       params["sampling_rate"])
    hop_length = int((1 - params[map_type]["overlap"]) * frame_length)

    plt.title(f"""
    ---------------- {map_type.capitalize()} ----------------
    ------- Genre - {data_map[-1]} ------- 
    """)
    y_axis = "log" if map_type == "spectrogram" else "mel"
    librosa.display.specshow(data_map[1],
                             sr=params["sampling_rate"],
                             hop_length=hop_length,
                             x_axis="time",
                             y_axis=y_axis)

    colorbar_format = "%i" if quantized else "%+2.0f dB"

    plt.colorbar(format=colorbar_format)
def create_initial_workteam(kfp_client, experiment_id, region,
                            sagemaker_client, test_file_dir, download_dir):
    test_params = utils.load_params(
        utils.replace_placeholders(
            os.path.join(test_file_dir, "config.yaml"),
            os.path.join(download_dir, "config.yaml"),
        ))

    test_params["Arguments"]["team_name"] = workteam_name = (
        utils.generate_random_string(5) + "-" +
        test_params["Arguments"]["team_name"])

    # First create a workteam using a separate pipeline and get the name, arn of the workteam created.
    create_workteamjob(
        kfp_client,
        test_params,
        experiment_id,
        region,
        sagemaker_client,
        download_dir,
    )

    workteam_arn = sagemaker_utils.get_workteam_arn(sagemaker_client,
                                                    workteam_name)

    return workteam_name, workteam_arn
Ejemplo n.º 9
0
def test_terminate_trainingjob(kfp_client, experiment_id, region,
                               sagemaker_client):
    test_file_dir = "resources/config/simple-mnist-training"
    download_dir = utils.mkdir(
        os.path.join(test_file_dir + "/generated_test_terminate"))
    test_params = utils.load_params(
        utils.replace_placeholders(
            os.path.join(test_file_dir, "config.yaml"),
            os.path.join(download_dir, "config.yaml"),
        ))

    input_job_name = test_params["Arguments"]["job_name"] = (
        utils.generate_random_string(4) + "-terminate-job")

    run_id, _, workflow_json = kfp_client_utils.compile_run_monitor_pipeline(
        kfp_client,
        experiment_id,
        test_params["PipelineDefinition"],
        test_params["Arguments"],
        download_dir,
        test_params["TestName"],
        60,
        "running",
    )
    print(
        f"Terminating run: {run_id} where Training job_name: {input_job_name}")
    kfp_client_utils.terminate_run(kfp_client, run_id)

    response = sagemaker_utils.describe_training_job(sagemaker_client,
                                                     input_job_name)
    assert response["TrainingJobStatus"] in ["Stopping", "Stopped"]

    utils.remove_dir(download_dir)
Ejemplo n.º 10
0
def reload(depot, folder, tag, layer):
    """
    """
    import utils
    
    cwd = os.getcwd()
    path = join(depot, folder)
    os.chdir(path)

    for f in os.listdir('.'):
        if f.endswith("schedule"):
            sched_f = f
    sched_f = open(sched_f)
    sched = cPickle.load(sched_f)
    sched_f.close()

    if layer >= 0:
        ltype = sched['stack'][layer]['type']
        params = utils.load_params(tag + ".params")
        shape = params[layer]['shape']
        model = ltype.__new__(ltype)
        model.__init__(shape=shape, **sched)
        model.reload(params[layer]['params'])
    os.chdir(cwd)

    return model, sched
Ejemplo n.º 11
0
def load_model(
        embed_map=None,
        path_to_model=PATH_TO_MODEL,            # model opts (.pkl)
        path_to_params=PATH_TO_PARAMS,          # model params (.npz)
        path_to_dictionary=PATH_TO_DICTIONARY,
        path_to_word2vec=PATH_TO_WORD2VEC
        ):
    """
    Load all model components + apply vocab expansion
    """
    # Load the worddict
    print 'Loading dictionary...'
    with open(path_to_dictionary, 'rb') as f:
        worddict = pkl.load(f)

    # Create inverted dictionary
    print 'Creating inverted dictionary...'
    word_idict = dict()
    for kk, vv in worddict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    # Load model options
    print 'Loading model options...'
    with open(path_to_model, 'rb') as f:
        options = pkl.load(f)

    # Load parameters
    print 'Loading model parameters...'
    params = init_params(options)
    params = load_params(path_to_params, params)
    tparams = init_tparams(params)

    # Extractor functions
    print 'Compiling encoder...'
    trng = RandomStreams(1234)
    trng, x, x_mask, ctx, emb = build_encoder(tparams, options)
    f_enc = theano.function([x, x_mask], ctx, name='f_enc')
    f_emb = theano.function([x], emb, name='f_emb')
    trng, embedding, x_mask, ctxw2v = build_encoder_w2v(tparams, options)
    f_w2v = theano.function([embedding, x_mask], ctxw2v, name='f_w2v')

    # Load word2vec, if applicable
    if embed_map == None:
        print 'Loading word2vec embeddings...'
        embed_map = load_googlenews_vectors(path_to_word2vec)

    # Lookup table using vocab expansion trick
    print 'Creating word lookup tables...'
    table = lookup_table(options, embed_map, worddict, word_idict, f_emb)

    # Store everything we need in a dictionary
    print 'Packing up...'
    model = {}
    model['options'] = options
    model['table'] = table
    model['f_w2v'] = f_w2v

    return model
Ejemplo n.º 12
0
def load_checkpoint(model_dir, epoch=None, eval_=False):
    """Load checkpoint from model directory. Checkpoints should be stored in 
    `model_dir/checkpoints/model-epochX.ckpt`, where `X` is the epoch number.
    
    Parameters:
        model_dir (str): path to model directory
        epoch (int): epoch number; set to None for last available epoch
        eval_ (bool): PyTorch evaluation mode. set to True for testing
        
    Returns:
        net (torch.nn.Module): PyTorch checkpoint at `epoch`
        epoch (int): epoch number
    
    """
    if epoch is None:  # get last epoch
        ckpt_dir = os.path.join(model_dir, 'checkpoints')
        epochs = [
            int(e[11:-3]) for e in os.listdir(ckpt_dir) if e[-3:] == ".pt"
        ]
        epoch = np.sort(epochs)[-1]
    ckpt_path = os.path.join(model_dir, 'checkpoints',
                             'model-epoch{}.pt'.format(epoch))
    params = utils.load_params(model_dir)
    print('Loading checkpoint: {}'.format(ckpt_path))
    state_dict = torch.load(ckpt_path)
    net = load_architectures(params['arch'], params['fd'])
    net.load_state_dict(state_dict)
    del state_dict
    if eval_:
        net.eval()
    return net, epoch
Ejemplo n.º 13
0
def main_sim():
    parser = argparse.ArgumentParser()
    parser.add_argument("--dir_scratch",
                        type=str,
                        help="temp directory in which to save data")

    args = parser.parse_args()
    dir_scratch = args.dir_scratch
    path_params = dir_scratch + "params.json"

    dir_data = dir_scratch + "data/"
    dir_sim = dir_scratch + "sim/"
    dir_train = dir_scratch + "train/"
    dir_spatial = dir_scratch + "spatial/"

    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    size = comm.Get_size()

    if rank == 0:
        params = load_params(path_params)
        jobs = make_jobs_sim(params, dir_data, dir_sim, dir_train, dir_spatial)
        jobs = split(jobs, size)
    else:
        jobs = None

    jobs = comm.scatter(jobs, root=0)

    for job in jobs:
        worker_sim(job)
Ejemplo n.º 14
0
 def test_load_params(self):
     w1, b1, w2, b2, w3, b3 = load_params()
     self.assertEqual(w1.shape, (256, 1024))
     self.assertEqual(b1.shape, (154, 256))
     self.assertEqual(w2.shape, (256, 256))
     self.assertEqual(b2.shape, (154, 256))
     self.assertEqual(w3.shape, (23, 256))
     self.assertEqual(b3.shape, (154, 23))
Ejemplo n.º 15
0
def main_ts():
    parser = argparse.ArgumentParser()
    parser.add_argument("--dir_scratch",
                        type=str,
                        help="temp directory in which to save data")

    args = parser.parse_args()

    dir_scratch = args.dir_scratch

    path_params = dir_scratch + "params.json"

    dir_data = dir_scratch + "data/"
    dir_ts = dir_scratch + "ts/"

    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    size = comm.Get_size()

    # Use half the cores for merging to keep mem within bounds
    size_half = int(size / 4)
    if size_half == 0:
        size_half = 1

    print(rank, "hello")

    # load jobs from json in the root process
    if rank == 0:
        params = load_params(path_params)
        jobs = params['ts']
        jobs_ts = add_paths_to_jobs(jobs, dir_data, dir_ts)
        make_work_data(jobs_ts, dir_data, dir_ts)
        jobs_ts = split(jobs_ts, size)
    else:
        jobs_ts = None

    jobs_ts = comm.scatter(jobs_ts, root=0)

    for j in jobs_ts:
        worker_ts(j)

    print(rank, "barrier")
    comm.Barrier()

    if rank == 0:
        jobs_merge = add_paths_to_jobs(jobs, dir_data, dir_ts)
        jobs_merge = make_jobs_merge(jobs_merge, dir_data, dir_ts)
        jobs_merge = split(jobs_merge, size_half, size)
    else:
        jobs_merge = None

    jobs_merge = comm.scatter(jobs_merge, root=0)

    for j in jobs_merge:
        worker_merge(j)

    print(rank, "barrier")
    comm.Barrier()
Ejemplo n.º 16
0
def test_create_endpoint(kfp_client, experiment_id, boto3_session,
                         sagemaker_client, test_file_dir):

    download_dir = utils.mkdir(os.path.join(test_file_dir + "/generated"))
    test_params = utils.load_params(
        utils.replace_placeholders(
            os.path.join(test_file_dir, "config.yaml"),
            os.path.join(download_dir, "config.yaml"),
        ))

    # Generate random prefix for model, endpoint config and endpoint name
    # to avoid errors if resources with same name exists
    test_params["Arguments"]["model_name"] = test_params["Arguments"][
        "endpoint_config_name"] = test_params["Arguments"][
            "endpoint_name"] = input_endpoint_name = (
                utils.generate_random_string(5) + "-" +
                test_params["Arguments"]["model_name"])
    print(f"running test with model/endpoint name: {input_endpoint_name}")

    _, _, workflow_json = kfp_client_utils.compile_run_monitor_pipeline(
        kfp_client,
        experiment_id,
        test_params["PipelineDefinition"],
        test_params["Arguments"],
        download_dir,
        test_params["TestName"],
        test_params["Timeout"],
    )

    try:
        outputs = {"sagemaker-deploy-model": ["endpoint_name"]}

        output_files = minio_utils.artifact_download_iterator(
            workflow_json, outputs, download_dir)

        output_endpoint_name = utils.read_from_file_in_tar(
            output_files["sagemaker-deploy-model"]["endpoint_name"],
            "endpoint_name.txt")
        print(f"endpoint name: {output_endpoint_name}")

        # Verify output from pipeline is endpoint name
        assert output_endpoint_name == input_endpoint_name

        # Verify endpoint is running
        assert (sagemaker_utils.describe_endpoint(
            sagemaker_client,
            input_endpoint_name)["EndpointStatus"] == "InService")

        # Validate the model for use by running a prediction
        result = run_predict_mnist(boto3_session, input_endpoint_name,
                                   download_dir)
        print(f"prediction result: {result}")
        assert json.dumps(result, sort_keys=True) == json.dumps(
            test_params["ExpectedPrediction"], sort_keys=True)
        utils.remove_dir(download_dir)
    finally:
        # delete endpoint
        sagemaker_utils.delete_endpoint(sagemaker_client, input_endpoint_name)
Ejemplo n.º 17
0
def load_model(
        path_to_model='/home/shunan/Code/skip-thoughts/experiments/amazon/amazon_model_bi.npz',
        path_to_dictionary='/home/shunan/Code/skip-thoughts/experiments/amazon/word_dicts.pkl',
        embed_map=None):
    """
    Load all model components + apply vocab expansion
    """
    # Load the worddict
    print 'Loading dictionary...'
    with open(path_to_dictionary, 'rb') as f:
        worddict = pkl.load(f)

    # Create inverted dictionary
    print 'Creating inverted dictionary...'
    word_idict = dict()
    for kk, vv in worddict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    # Load model options
    print 'Loading model options...'
    with open('%s.pkl' % path_to_model, 'rb') as f:
        options = pkl.load(f)

    # Load parameters
    print 'Loading model parameters...'
    params = init_params(options)
    params = load_params(path_to_model, params)
    tparams = init_tparams(params)

    # Extractor functions
    print 'Compiling encoder...'
    trng = RandomStreams(1234)
    trng, x, x_mask, ctx, emb = build_encoder(tparams, options)
    f_enc = theano.function([x, x_mask], ctx, name='f_enc')
    f_emb = theano.function([x], emb, name='f_emb')
    trng, embedding, x_mask, ctxw2v = build_encoder_w2v(tparams, options)
    f_w2v = theano.function([embedding, x_mask], ctxw2v, name='f_w2v')

    # Load word2vec, if applicable
    # if embed_map == None:
    #     print 'Loading word2vec embeddings...'
    #     embed_map = load_googlenews_vectors(path_to_word2vec)

    # Lookup table using vocab expansion trick
    print 'Creating word lookup tables...'
    table = lookup_table(options, embed_map, worddict, word_idict, f_emb)

    # Store everything we need in a dictionary
    print 'Packing up...'
    model = {}
    model['options'] = options
    model['table'] = table
    model['f_w2v'] = f_w2v

    # model is just a dict.
    return model
Ejemplo n.º 18
0
def test_trainingjob(kfp_client, experiment_id, region, sagemaker_client,
                     test_file_dir):

    download_dir = utils.mkdir(os.path.join(test_file_dir + "/generated"))
    test_params = utils.load_params(
        utils.replace_placeholders(
            os.path.join(test_file_dir, "config.yaml"),
            os.path.join(download_dir, "config.yaml"),
        ))

    _, _, workflow_json = kfp_client_utils.compile_run_monitor_pipeline(
        kfp_client,
        experiment_id,
        test_params["PipelineDefinition"],
        test_params["Arguments"],
        download_dir,
        test_params["TestName"],
        test_params["Timeout"],
    )

    outputs = {
        "sagemaker-training-job":
        ["job_name", "model_artifact_url", "training_image"]
    }
    output_files = minio_utils.artifact_download_iterator(
        workflow_json, outputs, download_dir)

    # Verify Training job was successful on SageMaker
    training_job_name = utils.read_from_file_in_tar(
        output_files["sagemaker-training-job"]["job_name"])
    print(f"training job name: {training_job_name}")
    train_response = sagemaker_utils.describe_training_job(
        sagemaker_client, training_job_name)
    assert train_response["TrainingJobStatus"] == "Completed"

    # Verify model artifacts output was generated from this run
    model_artifact_url = utils.read_from_file_in_tar(
        output_files["sagemaker-training-job"]["model_artifact_url"])
    print(f"model_artifact_url: {model_artifact_url}")
    assert model_artifact_url == train_response["ModelArtifacts"][
        "S3ModelArtifacts"]
    assert training_job_name in model_artifact_url

    # Verify training image output is an ECR image
    training_image = utils.read_from_file_in_tar(
        output_files["sagemaker-training-job"]["training_image"])
    print(f"Training image used: {training_image}")
    if "ExpectedTrainingImage" in test_params.keys():
        assert test_params["ExpectedTrainingImage"] == training_image
    else:
        assert f"dkr.ecr.{region}.amazonaws.com" in training_image

    assert not argo_utils.error_in_cw_logs(
        workflow_json["metadata"]["name"]
    ), "Found the CloudWatch error message in the log output. Check SageMaker to see if the job has failed."

    utils.remove_dir(download_dir)
Ejemplo n.º 19
0
def train(params=None):
    os.makedirs(params['ckpt_path'], exist_ok=True)

    device = torch.device("cuda")

    train_dataset = HDRDataset(params['dataset'],
                               params=params,
                               suffix=params['dataset_suffix'])
    train_loader = DataLoader(train_dataset,
                              batch_size=params['batch_size'],
                              shuffle=True)

    model = HDRPointwiseNN(params=params)
    ckpt = get_latest_ckpt(params['ckpt_path'])
    if ckpt:
        print('Loading previous state:', ckpt)
        state_dict = torch.load(ckpt)
        state_dict, _ = load_params(state_dict)
        model.load_state_dict(state_dict)
    model.to(device)

    mseloss = torch.nn.MSELoss()
    optimizer = Adam(model.parameters(), params['lr'])

    count = 0
    for e in range(params['epochs']):
        model.train()
        for i, (low, full, target) in enumerate(train_loader):
            optimizer.zero_grad()

            low = low.to(device)
            full = full.to(device)
            t = target.to(device)
            res = model(low, full)

            total_loss = mseloss(res, t)
            total_loss.backward()

            if (count + 1) % params['log_interval'] == 0:
                _psnr = psnr(res, t).item()
                loss = total_loss.item()
                print(e, count, loss, _psnr)

            optimizer.step()
            if (count + 1) % params['ckpt_interval'] == 0:
                print('@@ MIN:', torch.min(res), 'MAX:', torch.max(res))
                model.eval().cpu()
                ckpt_model_filename = "ckpt_" + str(e) + '_' + str(
                    count) + ".pth"
                ckpt_model_path = os.path.join(params['ckpt_path'],
                                               ckpt_model_filename)
                state = save_params(model.state_dict(), params)
                torch.save(state, ckpt_model_path)
                test(ckpt_model_path)
                model.to(device).train()
            count += 1
Ejemplo n.º 20
0
 def save_model(self, netG, avg_param_G, netsD, epoch):
     self.epoch_tracker.write(epoch)
     backup_para = copy_G_params(netG)
     checkpoint = {
         'epoch': epoch,
         'netG': None,
         'netsD_0': None,
         'netsD_1': None,
         'netsD_2': None,
     }
     load_params(netG, avg_param_G)
     checkpoint['netG'] = netG.state_dict()
     load_params(netG, backup_para)
     #
     for i in range(len(netsD)):
         netD = netsD[i]
         checkpoint['netsD_{}'.format(i)] = netD.state_dict()
     torch.save(checkpoint, self.model_file_name.format(epoch))
     print('Save G/Ds models.')
Ejemplo n.º 21
0
def test_workteamjob(
    kfp_client, experiment_id, region, sagemaker_client, test_file_dir
):

    download_dir = utils.mkdir(os.path.join(test_file_dir + "/generated"))

    test_params = utils.load_params(
        utils.replace_placeholders(
            os.path.join(test_file_dir, "config.yaml"),
            os.path.join(download_dir, "config.yaml"),
        )
    )

    # Generate random prefix for workteam_name to avoid errors if resources with same name exists
    test_params["Arguments"]["team_name"] = workteam_name = (
        utils.generate_random_string(5) + "-" + test_params["Arguments"]["team_name"]
    )

    try:
        workflow_json = create_workteamjob(
            kfp_client,
            test_params,
            experiment_id,
            region,
            sagemaker_client,
            download_dir,
        )

        outputs = {"sagemaker-private-workforce": ["workteam_arn"]}

        output_files = minio_utils.artifact_download_iterator(
            workflow_json, outputs, download_dir
        )

        response = sagemaker_utils.describe_workteam(sagemaker_client, workteam_name)

        # Verify WorkTeam was created in SageMaker
        assert response["Workteam"]["CreateDate"] is not None
        assert response["Workteam"]["WorkteamName"] == workteam_name

        # Verify WorkTeam arn artifact was created in Minio and matches the one in SageMaker
        workteam_arn = utils.read_from_file_in_tar(
            output_files["sagemaker-private-workforce"]["workteam_arn"]
        )
        assert response["Workteam"]["WorkteamArn"] == workteam_arn

    finally:
        workteams = sagemaker_utils.list_workteams(sagemaker_client)["Workteams"]
        workteam_names = list(map((lambda x: x["WorkteamName"]), workteams))
        # Check workteam was successfully created
        if workteam_name in workteam_names:
            sagemaker_utils.delete_workteam(sagemaker_client, workteam_name)

    # Delete generated files only if the test is successful
    utils.remove_dir(download_dir)
def display_random_audio_pieces(pieces,
                                n_genre=2,
                                n_tracks_per_genre=2,
                                n_short_term=2,
                                seed=11):
    """This function takes in a dictionary (`pieces`) of audio pieces and 
    displays the audio of randomly selected pieces in `pieces`
    
    Arguments:
        pieces {dict} -- a dictionary containing audio pieces
        n_genre {int} -- the number of genres to randomly select
        n_tracks_per_genre {int} -- the number of tracks to randomly sample in
            each sampled genre
        n_short_term {int} -- the number of pieces of the tracks sampled to
            randomly sample
        seed {int} -- the random seed used for sampling
    """

    R = np.random.RandomState(seed)

    config = load_config()
    params = load_params()
    train_root, genres, n_train_per_genre, _, _, sampling_rate = extract_from_config(
        config, params)

    random_genres = R.choice(genres, n_genre, replace=False)
    random_tracks = R.choice(n_train_per_genre, (n_genre, n_tracks_per_genre),
                             replace=False)
    random_pieces = R.choice(params["divide"]["number_pieces"],
                             (n_genre, n_tracks_per_genre, n_short_term),
                             replace=False)

    print(f"Genres picked are: {random_genres} \n")

    for g in range(n_genre):
        current_genre = random_genres[g]
        track_list = os.listdir(os.path.join(train_root, current_genre))
        print(f" -- Tracks for genre - {current_genre} -- \n ")
        for t in range(n_tracks_per_genre):
            track = track_list[random_tracks[g, t]]
            print("Full track:")
            print(track)
            IPython.display.display(
                IPython.display.Audio(
                    os.path.join(train_root, current_genre, track)))
            start_index = find_short_term_pieces_for(pieces, track)
            print(start_index)
            print("Random short-term pieces of the track:")
            for index in random_pieces[g, t]:
                print(f"piece {index}")
                IPython.display.display(
                    IPython.display.Audio(pieces["train"][start_index +
                                                          index][1],
                                          rate=sampling_rate))
Ejemplo n.º 23
0
def load_model(embed_map=None):
    """
    Load all model components + apply vocab expansion
    """
    # Load the worddict
    print('Loading dictionary...')
    with open(path_to_dictionary, 'rb') as f:
        worddict = pkl.load(f)

    # Create inverted dictionary
    print('Creating inverted dictionary...')
    word_idict = dict()
    for kk, vv in worddict.items():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    # Load model options
    print('Loading model options...')
    with open(f'{path_to_model}.pkl', 'rb') as f:
        options = pkl.load(f)

    # Load parameters
    print('Loading model parameters...')
    params = init_params(options)
    params = load_params(path_to_model, params)
    tparams = init_tparams(params)

    # Extractor functions
    print('Compiling encoder...')
    trng = RandomStreams(1234)
    trng, x, x_mask, ctx, emb = build_encoder(tparams, options)
    f_enc = theano.function([x, x_mask], ctx, name='f_enc')
    f_emb = theano.function([x], emb, name='f_emb')
    trng, embedding, x_mask, ctxw2v = build_encoder_w2v(tparams, options)
    f_w2v = theano.function([embedding, x_mask], ctxw2v, name='f_w2v')

    # Load word2vec, if applicable
    if embed_map is None:
        print('Loading word2vec embeddings...')
        embed_map = load_googlenews_vectors(path_to_word2vec)

    # Lookup table using vocab expansion trick
    print('Creating word lookup tables...')
    table = lookup_table(options, embed_map, worddict, word_idict, f_emb)

    # Store everything we need in a dictionary
    print('Packing up...')
    model = {}
    model['options'] = options
    model['table'] = table
    model['f_w2v'] = f_w2v

    return model
Ejemplo n.º 24
0
def main_train():
    parser = argparse.ArgumentParser()
    parser.add_argument("--dir_scratch",
                        type=str,
                        help="temp directory in which to save data")

    args = parser.parse_args()

    dir_scratch = args.dir_scratch

    path_params = dir_scratch + "params.json"

    dir_data = dir_scratch + "data/"
    dir_train = dir_scratch + "train/"

    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    size = comm.Get_size()

    # Use half the cores for mergin to keep mem within bounds
    size_half = int(size / 2)
    if size_half == 0:
        size_half = 1

    # load jobs from json in the root process
    if rank == 0:
        params = load_params(path_params)
        jobs = params['models']

        jobs = add_paths_to_jobs(jobs, dir_data, dir_train)
        jobs = add_varsets_to_modeljobs(jobs)
        jobs = add_nsim_to_jobs(jobs, params['nsim'])

        train_start = params['times']['train_start']
        train_end = params['times']['train_end']
        allvars = get_model_vars(jobs)

        print("ALLVARS:")
        for v in allvars:
            print(v)

        make_data_subset(allvars, train_start, train_end, dir_data, dir_train)

        jobs = split(jobs, size)

    else:
        jobs = None

    jobs = comm.scatter(jobs, root=0)

    for job in jobs:
        worker_train(job)
Ejemplo n.º 25
0
    def load_params_from_file(self, fname: str):
        """
        Loads and sets model parameters from file.

        :param fname: Path to load parameters from.
        """
        assert self._is_built
        utils.check_condition(
            os.path.exists(fname), "No model parameter file found under %s. "
            "This is either not a model directory or the first training "
            "checkpoint has not happened yet." % fname)
        self.params, _ = utils.load_params(fname)
        logger.info('Loaded params from "%s"', fname)
Ejemplo n.º 26
0
 def reload(self, depot, folder, tag):
     """
     reload schedule and parameters from depot/folder/tag.params
     depot, abs path
     """
     from utils import load_params
     from os.path import join
     from gnumpy import as_garray
     file_prefix = join(depot, folder, tag)
     params = load_params(file_prefix + ".params")
     params_stack = params['Stack']['params']
     self.params = as_garray(params_stack)
     for layer, (c1, c2) in izip(self, izip(self.cuts[:-1], self.cuts[1:])):
         layer.p = self.params[c1:c2]
Ejemplo n.º 27
0
def main(image_path, results_path, iterations, validation_iterations,
         kernels_per_dim, params_file, l1reg, base_lr, batches,
         checkpoint_path, lr_div, lr_mult, disable_train_pis,
         disable_train_gammas, radial_as, quiet):
    orig = plt.imread(image_path)
    if orig.dtype == np.uint8:
        orig = orig.astype(np.float32) / 255.

    if params_file is not None:
        init_params = load_params(params_file)
    else:
        init_params = None

    if results_path is not None:
        if os.path.exists(results_path):
            shutil.rmtree(results_path)
        os.mkdir(results_path)

    loss_plotter = LossPlotter(path=results_path + "/loss.png", quiet=quiet)
    image_plotter = ImagePlotter(
        path=results_path,
        options=['orig', 'reconstruction', 'gating', 'pis_hist'],
        quiet=quiet)
    logger = ModelLogger(path=results_path)

    smoe = Smoe(orig,
                kernels_per_dim,
                init_params=init_params,
                train_pis=not disable_train_pis,
                train_gammas=not disable_train_gammas,
                radial_as=radial_as,
                start_batches=batches)

    optimizer1 = tf.train.AdamOptimizer(base_lr)
    optimizer2 = tf.train.AdamOptimizer(base_lr / lr_div)
    optimizer3 = tf.train.AdamOptimizer(base_lr * lr_mult)

    # optimizers have to be set before the restore
    smoe.set_optimizer(optimizer1, optimizer2, optimizer3)

    if checkpoint_path is not None:
        smoe.restore(checkpoint_path)

    smoe.train(iterations,
               val_iter=validation_iterations,
               pis_l1=l1reg,
               callbacks=[loss_plotter.plot, image_plotter.plot, logger.log])

    save_model(smoe, results_path + "/params_best.pkl", best=True)
    save_model(smoe, results_path + "/params_last.pkl", best=False)
Ejemplo n.º 28
0
def load_model(path_to_model=default_model):
    """
    Load all model components
    """
    print path_to_model

    # Load the worddict
    print 'Loading dictionary...'
    with open(path_to_model + '.dictionary.pkl', 'rb') as f:
        worddict = pkl.load(f)

    # Create inverted dictionary
    print 'Creating inverted dictionary...'
    word_idict = dict()
    for kk, vv in worddict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    # Load model options
    print 'Loading model options...'
    with open(path_to_model + '.pkl', 'rb') as f:
        options = pkl.load(f)

    # Load parameters
    print 'Loading model parameters...'
    params = init_params(options)
    params = load_params(path_to_model, params)
    tparams = init_tparams(params)

    # Extractor functions

    print 'Compiling image encoder...'
    trng, [im], images = build_image_encoder(tparams, options)
    f_ienc = theano.function([im], images, name='f_ienc')

    print 'Compiling sentence encoder...'
    trng = RandomStreams(1234)
    trng, [x, x_mask], sentences = build_sentence_encoder(tparams, options)
    f_senc = theano.function([x, x_mask], sentences, name='f_senc')

    # Store everything we need in a dictionary
    print 'Packing up...'
    model = {}
    model['options'] = options
    model['worddict'] = worddict
    model['word_idict'] = word_idict
    model['f_senc'] = f_senc
    model['f_ienc'] = f_ienc
    return model
Ejemplo n.º 29
0
def get_segmenter_function(params_loc,
                           img_size,
                           ensemble=False,
                           version=2,
                           param_file_key='.npz',
                           weight_full_params=0.33):
    shape = (None, 1, img_size, img_size)
    input_var = T.tensor4('input')
    if ensemble:
        expr = 0
        params_files = filter(lambda s: 'v{}'.format(version) in s,
                              os.listdir(params_loc))
        if param_file_key is not None:
            params_files = filter(lambda s: param_file_key in s, params_files)
        full_params_indices = [
            i for i, a in enumerate(params_files) if 'f-1' in a
        ]
        if len(full_params_indices) > 0:
            wt_norm = (1. - weight_full_params) / (len(params_files) -
                                                   len(full_params_indices))
            wt_full = weight_full_params / len(full_params_indices)
            params_weights = [(wt_norm if i not in full_params_indices else wt_full) \
                    for i in xrange(len(params_files))]
        else:
            params_weights = [1. / len(params_files)] * len(params_files)
        for pfn, w in zip(params_files, params_weights):
            net, _, output_det = build_fcn_segmenter(input_var, shape, version)
            u.load_params(net['output'], os.path.join(params_loc, pfn))
            expr = expr + w * output_det
            print 'loaded {} wt {}'.format(pfn, w)
        print 'loaded {} in ensemble'.format(len(params_files))
    else:
        net, _, output_det = build_fcn_segmenter(input_var, shape, version)
        u.load_params(net['output'], params_loc)
        expr = output_det
        print 'loaded indiv function {}'.format(params_loc)
    return theano.function([input_var], expr)
Ejemplo n.º 30
0
def load_model(path_to_model=default_model):
    """
    Load all model components
    """
    print path_to_model

    # Load the worddict
    print 'Loading dictionary...'
    with open('%s.dictionary.pkl'%path_to_model, 'rb') as f:
        worddict = pkl.load(f)

    # Create inverted dictionary
    print 'Creating inverted dictionary...'
    word_idict = dict()
    for kk, vv in worddict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    # Load model options
    print 'Loading model options...'
    with open('%s.pkl'%path_to_model, 'rb') as f:
        options = pkl.load(f)

    # Load parameters
    print 'Loading model parameters...'
    params = init_params(options)
    params = load_params(path_to_model, params)
    tparams = init_tparams(params)

    # Extractor functions
    print 'Compiling sentence encoder...'
    trng = RandomStreams(1234)
    trng, [x, x_mask], sentences = build_sentence_encoder(tparams, options)
    f_senc = theano.function([x, x_mask], sentences, name='f_senc')

    print 'Compiling image encoder...'
    trng, [im], images = build_image_encoder(tparams, options)
    f_ienc = theano.function([im], images, name='f_ienc')

    # Store everything we need in a dictionary
    print 'Packing up...'
    model = {}
    model['options'] = options
    model['worddict'] = worddict
    model['word_idict'] = word_idict
    model['f_senc'] = f_senc
    model['f_ienc'] = f_ienc
    return model
Ejemplo n.º 31
0
def load_model():
    """
    Load all model components
    """
    print path_to_model

    # Load the worddict
    print "Loading dictionary..."
    with open("%s.dictionary.pkl" % path_to_model, "rb") as f:
        worddict = pkl.load(f)

    # Create inverted dictionary
    print "Creating inverted dictionary..."
    word_idict = dict()
    for kk, vv in worddict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = "<eos>"
    word_idict[1] = "UNK"

    # Load model options
    print "Loading model options..."
    with open("%s.pkl" % path_to_model, "rb") as f:
        options = pkl.load(f)

    # Load parameters
    print "Loading model parameters..."
    params = init_params(options)
    params = load_params(path_to_model, params)
    tparams = init_tparams(params)

    # Extractor functions
    print "Compiling sentence encoder..."
    trng = RandomStreams(1234)
    trng, [x, x_mask], sentences = build_sentence_encoder(tparams, options)
    f_senc = theano.function([x, x_mask], sentences, name="f_senc")

    print "Compiling image encoder..."
    trng, [im], images = build_image_encoder(tparams, options)
    f_ienc = theano.function([im], images, name="f_ienc")

    # Store everything we need in a dictionary
    print "Packing up..."
    model = {}
    model["options"] = options
    model["worddict"] = worddict
    model["word_idict"] = word_idict
    model["f_senc"] = f_senc
    model["f_ienc"] = f_ienc
    return model
Ejemplo n.º 32
0
def get_best_model(return_weights=False):
    params = utils.load_params()
    model = MyAlexNet(params).to(device=params.device)
    checkpoint = os.path.join(config.model_dir, 'last.pth.tar')
    utils.load_checkpoint(checkpoint, model, params)
    if return_weights:
        weights = {
            "conv1":model.conv1[0].weight.data,
            "conv2":model.conv2[0].weight.data,
            "conv3":model.conv3[0].weight.data,
            "conv4":model.conv4[0].weight.data,
            "conv5":model.conv5[0].weight.data,
        }
        return weights
    return model, params
Ejemplo n.º 33
0
 def load_params(self, name="", dir_path="", epoch=None):
     params, aux_states, param_loading_path = load_params(dir_path=dir_path,
                                                          epoch=epoch,
                                                          name=name)
     logging.info('Loading params from \"%s\" to %s' %
                  (param_loading_path, self.name))
     for k, v in params.items():
         if k in self.params:
             logging.debug('   Loading %s %s' % (k, str(v.shape)))
             self.params[k][:] = v
         else:
             logging.warn("Found unused param in the saved model file: %s" %
                          k)
     for k, v in aux_states.items():
         self.aux_states[k][:] = v
Ejemplo n.º 34
0
def run_mlstm1900_multiple_sequences():
    sequences = [
        "MKLVTITJ",
        "MKLVDIAJ",
        "MKLVTIAJ",
        "MKLRKIAJ",
        "MKLVTIMJ",
    ]

    params = load_params()
    x = get_embeddings(sequences)
    h_final, c_final, out = mlstm1900(params, x)

    print(out.shape)
    print(h_final.shape)
    print(c_final.shape)
Ejemplo n.º 35
0
def gen_testloss(args):
    # load data and model
    params = utils.load_params(args.model_dir)
    ckpt_dir = os.path.join(args.model_dir, 'checkpoints')
    ckpt_paths = [
        int(e[11:-3]) for e in os.listdir(ckpt_dir) if e[-3:] == ".pt"
    ]
    ckpt_paths = np.sort(ckpt_paths)

    # csv
    headers = [
        "epoch", "step", "loss", "discrimn_loss_e", "compress_loss_e",
        "discrimn_loss_t", "compress_loss_t"
    ]
    csv_path = utils.create_csv(args.model_dir, 'losses_test.csv', headers)
    print('writing to:', csv_path)

    # load data
    test_transforms = tf.load_transforms('test')
    testset = tf.load_trainset(params['data'], test_transforms, train=False)
    testloader = DataLoader(testset,
                            batch_size=params['bs'],
                            shuffle=False,
                            num_workers=4)

    # save loss
    criterion = MaximalCodingRateReduction(gam1=params['gam1'],
                                           gam2=params['gam2'],
                                           eps=params['eps'])
    for epoch, ckpt_path in enumerate(ckpt_paths):
        net, epoch = tf.load_checkpoint(args.model_dir,
                                        epoch=epoch,
                                        eval_=True)
        for step, (batch_imgs, batch_lbls) in enumerate(testloader):
            features = net(batch_imgs.cuda())
            loss, loss_empi, loss_theo = criterion(features,
                                                   batch_lbls,
                                                   num_classes=len(
                                                       testset.classes))
            utils.save_state(args.model_dir,
                             epoch,
                             step,
                             loss.item(),
                             *loss_empi,
                             *loss_theo,
                             filename='losses_test.csv')
    print("Finished generating test loss.")
Ejemplo n.º 36
0
def load_model(
        path_to_model=PATH_TO_MODEL,            # model opts (.pkl)
        path_to_params=PATH_TO_PARAMS,          # model params (.npz)
        path_to_dictionary=PATH_TO_DICTIONARY
        ):
    """
    Load a trained model for decoding
    """
    # Load the worddict
    print 'Loading dictionary...'
    with open(path_to_dictionary, 'rb') as f:
        worddict = pkl.load(f)

    # Create inverted dictionary
    print 'Creating inverted dictionary...'
    word_idict = dict()
    for kk, vv in worddict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    # Load model options
    print 'Loading model options...'
    with open('%s.pkl'%path_to_model, 'rb') as f:
        options = pkl.load(f)

    # Load parameters
    print 'Loading model parameters...'
    params = init_params(options)
    params = load_params(path_to_params, params)
    tparams = init_tparams(params)

    # Sampler.
    trng = RandomStreams(1234)
    f_init, f_next = build_sampler(tparams, options, trng)

    # Pack everything up
    dec = dict()
    dec['options'] = options
    dec['trng'] = trng
    dec['worddict'] = worddict
    dec['word_idict'] = word_idict
    dec['tparams'] = tparams
    dec['f_init'] = f_init
    dec['f_next'] = f_next
    return dec
def test_trainingjob(kfp_client, experiment_id, sagemaker_client,
                     test_file_dir):

    download_dir = utils.mkdir(os.path.join(test_file_dir + "/generated"))
    test_params = utils.load_params(
        utils.replace_placeholders(
            os.path.join(test_file_dir, "config.yaml"),
            os.path.join(download_dir, "config.yaml"),
        ))

    test_params["Arguments"]["hyperparameters"] = json.dumps(
        test_params["Arguments"]["hyperparameters"])
    test_params["Arguments"]["channels"] = json.dumps(
        test_params["Arguments"]["channels"])
    run_id, status, workflow_json = kfp_client_utils.compile_run_monitor_pipeline(
        kfp_client,
        experiment_id,
        test_params["PipelineDefinition"],
        test_params["Arguments"],
        download_dir,
        test_params["TestName"],
        test_params["Timeout"],
    )

    outputs = {"sagemaker-training-job": ["job_name", "model_artifact_url"]}
    output_files = minio_utils.artifact_download_iterator(
        workflow_json, outputs, download_dir)

    # Verify Training job was successful on SageMaker
    training_job_name = utils.extract_information(
        output_files["sagemaker-training-job"]["job_name"], "job_name.txt")
    print(f"training job name: {training_job_name}")
    train_response = sagemaker_utils.describe_training_job(
        sagemaker_client, training_job_name.decode())
    assert train_response["TrainingJobStatus"] == "Completed"

    # Verify model artifacts output was generated from this run
    model_artifact_url = utils.extract_information(
        output_files["sagemaker-training-job"]["model_artifact_url"],
        "model_artifact_url.txt",
    )
    print(f"model_artifact_url: {model_artifact_url}")
    assert (model_artifact_url.decode() == train_response["ModelArtifacts"]
            ["S3ModelArtifacts"])
    assert (train_response["ModelArtifacts"]["S3ModelArtifacts"]
            in model_artifact_url.decode())
Ejemplo n.º 38
0
def gen_accuracy(args):
    # load data and model
    params = utils.load_params(args.model_dir)
    ckpt_dir = os.path.join(args.model_dir, 'checkpoints')
    ckpt_paths = [
        int(e[11:-3]) for e in os.listdir(ckpt_dir) if e[-3:] == ".pt"
    ]
    ckpt_paths = np.sort(ckpt_paths)

    # csv
    headers = ["epoch", "acc_train", "acc_test"]
    csv_path = utils.create_csv(args.model_dir, 'accuracy.csv', headers)

    for epoch, ckpt_paths in enumerate(ckpt_paths):
        if epoch % 5 != 0:
            continue
        net, epoch = tf.load_checkpoint(args.model_dir,
                                        epoch=epoch,
                                        eval_=True)
        # load data
        train_transforms = tf.load_transforms('test')
        trainset = tf.load_trainset(params['data'],
                                    train_transforms,
                                    train=True)
        trainloader = DataLoader(trainset, batch_size=500, num_workers=4)
        train_features, train_labels = tf.get_features(net,
                                                       trainloader,
                                                       verbose=False)

        test_transforms = tf.load_transforms('test')
        testset = tf.load_trainset(params['data'],
                                   test_transforms,
                                   train=False)
        testloader = DataLoader(testset, batch_size=500, num_workers=4)
        test_features, test_labels = tf.get_features(net,
                                                     testloader,
                                                     verbose=False)

        acc_train, acc_test = svm(args, train_features, train_labels,
                                  test_features, test_labels)
        utils.save_state(args.model_dir,
                         epoch,
                         acc_train,
                         acc_test,
                         filename='accuracy.csv')
    print("Finished generating accuracy.")
Ejemplo n.º 39
0
def load_model(
        path_to_model=PATH_TO_MODEL,  # model opts (.pkl)
        path_to_params=PATH_TO_PARAMS,  # model params (.npz)
        path_to_dictionary=PATH_TO_DICTIONARY):
    """
    Load a trained model for decoding
    """
    # Load the worddict
    print 'Loading dictionary...'
    with open(path_to_dictionary, 'rb') as f:
        worddict = pkl.load(f)

    # Create inverted dictionary
    print 'Creating inverted dictionary...'
    word_idict = dict()
    for kk, vv in worddict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    # Load model options
    print 'Loading model options...'
    with open('%s.pkl' % path_to_model, 'rb') as f:
        options = pkl.load(f)

    # Load parameters
    print 'Loading model parameters...'
    params = init_params(options)
    params = load_params(path_to_params, params)
    tparams = init_tparams(params)

    # Sampler.
    trng = RandomStreams(1234)
    f_init, f_next = build_sampler(tparams, options, trng)

    # Pack everything up
    dec = dict()
    dec['options'] = options
    dec['trng'] = trng
    dec['worddict'] = worddict
    dec['word_idict'] = word_idict
    dec['tparams'] = tparams
    dec['f_init'] = f_init
    dec['f_next'] = f_next
    return dec
Ejemplo n.º 40
0
def load_model(path_to_model):
    """
    Load all model components
    """
    print path_to_model

    # Load model
    print 'Loading model'
    with open(path_to_model + '.pkl', 'rb') as f:
        model = pkl.load(f)

    options = model['options']

    # Load parameters
    print 'Loading model parameters...'
    params = init_params(options)
    params = load_params(path_to_model + '.npz', params)
    tparams = init_tparams(params)

    # Extractor functions
    print 'Compiling sentence encoder...'
    [x, x_mask], sentences = build_sentence_encoder(tparams, options)
    f_senc = theano.function([x, x_mask], sentences, name='f_senc')

    print 'Compiling image encoder...'
    [im], images = build_image_encoder(tparams, options)
    f_ienc = theano.function([im], images, name='f_ienc')

    print 'Compiling error computation...'
    [s, im], errs = build_errors(options)
    f_err = theano.function([s,im], errs, name='f_err')

    # Store everything we need in a dictionary
    print 'Packing up...'
    model['f_senc'] = f_senc
    model['f_ienc'] = f_ienc
    model['f_err'] = f_err
    return model
Ejemplo n.º 41
0
def deconvoluter(params_fn, specstr, shape):
    input_var = T.tensor4('input')
    decnet = build_deconv_net(input_var, shape=shape, specstr=specstr)
    u.load_params(decnet, params_fn)
    return theano.function([input_var], nn.layers.get_output(decnet))
Ejemplo n.º 42
0
def train(dim_word=100,  # word vector dimensionality
          dim=1000,  # the number of LSTM units
          encoder='gru',
          decoder='gru_cond',
          n_words_src=30000,
          n_words=30000,
          patience=10,  # early stopping patience
          max_epochs=5000,
          finish_after=10000000,  # finish after this many updates
          dispFreq=100,
          decay_c=0.,  # L2 regularization penalty
          alpha_c=0.,  # alignment regularization
          clip_c=-1.,  # gradient clipping threshold
          lrate=1.,  # learning rate
          maxlen=100,  # maximum length of the description
          optimizer='rmsprop',
          batch_size=16,
          saveto='model.npz',
          saveFreq=1000,  # save the parameters after every saveFreq updates
          datasets=[
              '/data/lisatmp3/chokyun/europarl/europarl-v7.fr-en.en.tok',
              '/data/lisatmp3/chokyun/europarl/europarl-v7.fr-en.fr.tok'],
          picked_train_idxes_file=r'',
          use_dropout=False,
          reload_=False,
          overwrite=False,
          preload='',
          sort_by_len=False,
          convert_embedding=True,
          dump_before_train=False,
    ):
    # Model options
    model_options = locals().copy()
    if reload_:
        lrate *= 0.5

    # load dictionaries and invert them

    # reload options
    if reload_ and os.path.exists(preload):
        print 'Reloading model options'
        with open(r'.\model\en2fr.iter160000.npz.pkl', 'rb') as f:
            model_options = pkl.load(f)

    print 'Configuration from fy'

    vocab_en_filename = './data/dic/en2fr_en_vocabs_top1M.pkl'
    vocab_fr_filename = './data/dic/en2fr_fr_vocabs_top1M.pkl'
    map_filename = './data/dic/mapFullVocab2Top1MVocab.pkl'
    lr_discount_freq = 80000

    print 'Done'

    print 'Loading data'

    text_iterator = TextIterator(
        datasets[0],
        datasets[1],
        vocab_en_filename,
        vocab_fr_filename,
        batch_size,
        maxlen,
        n_words_src,
        n_words,
    )

    # sys.stdout.flush()
    # train_data_x = pkl.load(open(datasets[0], 'rb'))
    # train_data_y = pkl.load(open(datasets[1], 'rb'))
    #
    # if len(picked_train_idxes_file) != 0:
    #     picked_idxes = pkl.load(open(picked_train_idxes_file, 'rb'))
    #     train_data_x = [train_data_x[id] for id in picked_idxes]
    #     train_data_y = [train_data_y[id] for id in picked_idxes]
    #
    # print 'Total train:', len(train_data_x)
    # print 'Max len:', max([len(x) for x in train_data_x])
    # sys.stdout.flush()
    #
    # if sort_by_len:
    #     slen = np.array([len(s) for s in train_data_x])
    #     sidx = slen.argsort()
    #
    #     _sbuf = [train_data_x[i] for i in sidx]
    #     _tbuf = [train_data_y[i] for i in sidx]
    #
    #     train_data_x = _sbuf
    #     train_data_y = _tbuf
    #     print len(train_data_x[0]), len(train_data_x[-1])
    #     sys.stdout.flush()
    #     train_batch_idx = get_minibatches_idx(len(train_data_x), batch_size, shuffle=False)
    # else:
    #     train_batch_idx = get_minibatches_idx(len(train_data_x), batch_size, shuffle=True)

    print 'Building model'
    params = init_params(model_options)
    # reload parameters
    if reload_ and os.path.exists(preload):
        print 'Reloading model parameters'
        params = load_params(preload, params)

        # for k, v in params.iteritems():
        #     print '>', k, v.shape, v.dtype

        # Only convert parameters when reloading
        if convert_embedding:
            # =================
            # Convert input and output embedding parameters with a exist word embedding
            # =================
            print 'Convert input and output embedding'

            temp_Wemb = params['Wemb']
            orig_emb_mean = np.mean(temp_Wemb, axis=0)

            params['Wemb'] = np.tile(orig_emb_mean, [params['Wemb'].shape[0], 1])

            # Load vocabulary map dicts and do mapping
            with open(map_filename, 'rb') as map_file:
                map_en = pkl.load(map_file)
                map_fr = pkl.load(map_file)

            for full, top in map_en.iteritems():
                emb_size = temp_Wemb.shape[0]
                if full < emb_size and top < emb_size:
                    params['Wemb'][top] = temp_Wemb[full]

            print 'Convert input embedding done'

            temp_ff_logit_W = params['ff_logit_W']
            temp_Wemb_dec = params['Wemb_dec']
            temp_b = params['ff_logit_b']

            orig_ff_logit_W_mean = np.mean(temp_ff_logit_W, axis=1)
            orig_Wemb_dec_mean = np.mean(temp_Wemb_dec, axis=0)
            orig_b_mean = np.mean(temp_b)

            params['ff_logit_W'] = np.tile(orig_ff_logit_W_mean, [params['ff_logit_W'].shape[1], 1]).T
            params['ff_logit_b'].fill(orig_b_mean)
            params['Wemb_dec'] = np.tile(orig_Wemb_dec_mean, [params['Wemb_dec'].shape[0], 1])

            for full, top in map_en.iteritems():
                emb_size = temp_Wemb.shape[0]
                if full < emb_size and top < emb_size:
                    params['ff_logit_W'][:, top] = temp_ff_logit_W[:, full]
                    params['ff_logit_b'][top] = temp_b[full]
                    params['Wemb_dec'][top] = temp_Wemb[full]

            print 'Convert output embedding done'

            # for k, v in params.iteritems():
            #     print '>', k, v.shape, v.dtype

            # ================
            # End Convert
            # ================

    tparams = init_tparams(params)

    trng, use_noise, \
        x, x_mask, y, y_mask, \
        opt_ret, \
        cost, x_emb = \
        build_model(tparams, model_options)
    inps = [x, x_mask, y, y_mask]

    print 'Building sampler'
    f_init, f_next = build_sampler(tparams, model_options, trng, use_noise)

    # before any regularizer
    print 'Building f_log_probs...',
    f_log_probs = theano.function(inps, cost, profile=profile)
    f_x_emb = theano.function([x, x_mask], x_emb, profile=profile)
    print 'Done'
    sys.stdout.flush()
    cost = cost.mean()

    # apply L2 regularization on weights
    if decay_c > 0.:
        decay_c = theano.shared(np.float32(decay_c), name='decay_c')
        weight_decay = 0.
        for kk, vv in tparams.iteritems():
            weight_decay += (vv ** 2).sum()
        weight_decay *= decay_c
        cost += weight_decay

    # regularize the alpha weights
    if alpha_c > 0. and not model_options['decoder'].endswith('simple'):
        alpha_c = theano.shared(np.float32(alpha_c), name='alpha_c')
        alpha_reg = alpha_c * (
            (tensor.cast(y_mask.sum(0) // x_mask.sum(0), 'float32')[:, None] -
             opt_ret['dec_alphas'].sum(0)) ** 2).sum(1).mean()
        cost += alpha_reg

    # after all regularizers - compile the computational graph for cost
    print 'Building f_cost...',
    f_cost = theano.function(inps, cost, profile=profile)
    print 'Done'

    print 'Computing gradient...',
    grads = tensor.grad(cost, wrt=itemlist(tparams))
    print 'Done'
    sys.stdout.flush()
    # apply gradient clipping here
    if clip_c > 0.:
        g2 = 0.
        for g in grads:
            g2 += (g ** 2).sum()
        new_grads = []
        for g in grads:
            new_grads.append(tensor.switch(g2 > (clip_c ** 2),
                                           g / tensor.sqrt(g2) * clip_c,
                                           g))
        grads = new_grads

    # compile the optimizer, the actual computational graph is compiled here
    lr = tensor.scalar(name='lr')
    print 'Building optimizers...',
    f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost)
    print 'Done'

    print 'Optimization'

    best_p = None
    bad_counter = 0
    uidx = 0
    if reload_:
        m = re.search('.+iter(\d+?)\.npz', preload)
        if m:
            uidx = int(m.group(1))
    print 'uidx', uidx, 'l_rate', lrate

    estop = False
    history_errs = []
    # reload history

    if dump_before_train:
        print 'Dumping before train...',
        saveto_uidx = '{}.iter{}.npz'.format(
            os.path.splitext(saveto)[0], uidx)
        np.savez(saveto_uidx, history_errs=history_errs,
                 uidx=uidx, **unzip(tparams))
        print 'Done'

    if saveFreq == -1:
        saveFreq = len(train[0]) / batch_size

    for eidx in xrange(max_epochs):
        n_samples = 0

        # for i, batch_idx in train_batch_idx:
        #
        #     x = [train_data_x[id] for id in batch_idx]
        #     y = [train_data_y[id] for id in batch_idx]

        for i, (x, y) in enumerate(text_iterator):
            n_samples += len(x)
            uidx += 1
            use_noise.set_value(1.)

            x, x_mask, y, y_mask = prepare_data(x, y)

            if x is None:
                print 'Minibatch with zero sample under length ', maxlen
                uidx -= 1
                continue

            ud_start = time.time()

            # compute cost, grads and copy grads to shared variables
            cost = f_grad_shared(x, x_mask, y, y_mask)

            # do the update on parameters
            f_update(lrate)

            ud = time.time() - ud_start

            # check for bad numbers, usually we remove non-finite elements
            # and continue training - but not done here
            if np.isnan(cost) or np.isinf(cost):
                print 'NaN detected'
                return 1., 1., 1.

            # discount reward
            if lr_discount_freq > 0 and np.mod(uidx, lr_discount_freq) == 0:
                lrate *= 0.5
                print 'Discount learning rate to {} at iteration {}'.format(lrate, uidx)

            # verbose
            if np.mod(uidx, dispFreq) == 0:
                print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud
                sys.stdout.flush()

            if np.mod(uidx, saveFreq) == 0:
                # save with uidx
                if not overwrite:
                    # print 'Saving the model at iteration {}...'.format(uidx),
                    saveto_uidx = '{}.iter{}.npz'.format(
                            os.path.splitext(saveto)[0], uidx)
                    np.savez(saveto_uidx, history_errs=history_errs,
                             uidx=uidx, **unzip(tparams))
                    # print 'Done'
                    # sys.stdout.flush()
            # generate some samples with the model and display them

            # finish after this many updates
            if uidx >= finish_after:
                print 'Finishing after %d iterations!' % uidx
                estop = True
                break

        print 'Seen %d samples' % n_samples

        if estop:
            break

    if best_p is not None:
        zipp(best_p, tparams)

    use_noise.set_value(0.)

    return 0.
        nest.SetStatus(voltmeter, [{"to_file": True, "withtime": True, 'label' : self.params['exc_volt_fn']}])
        nest.DivergentConnect(voltmeter, self.nrns)


    def run(self):
        # R U N 
        nest.Simulate(self.params['t_sim'])


if __name__ == '__main__':

        
    if len(sys.argv) == 2:
#        params = utils.load_params(os.path.abspath(sys.argv[1]))
        # load existing parameters
        params_json = utils.load_params(os.path.abspath(sys.argv[1]))
        params = utils.convert_to_NEST_conform_dict(params_json) 
        # this is necessary because json stores information in unicode, but SLI (the NEST interpreter) does not understand unicode ...

    else:
        GP = simulation_parameters.global_parameters()
        params = GP.params
        GP.write_parameters_to_file() # write_parameters_to_file MUST be called before every simulation

    sim = Simulation(params) 
    sim.setup()
    sim.create_cells()
    sim.create_input_spiketrains()
    sim.record()
    sim.run()
Ejemplo n.º 44
0
    max_epoch = (0 if CV else num_epochs);
    for i in xrange(ntimes):
        if not CV and i!=5:
            continue
        if i == 5:
            num_epochs = max_epoch+1;
            print("full train data use {:d} epochs".format(num_epochs))
        nn.layers.set_all_param_values(net['output'],init0);
        data = u.DataH5PyStreamer(os.path.join(c.data_sunnybrook, datafile), batch_size=batch_size, folds=(5,i))
        hist,best_epoch = u.train_with_hdf5(data, num_epochs=num_epochs, train_fn = train_fn, test_fn=test_fn,
                            max_per_epoch=-1,
                            tr_transform=lambda x: du.segmenter_data_transform(x, shift=shi, rotate=rot, scale = sca, normalize_pctwise=pct_norm_tr),
                            te_transform=lambda x: du.segmenter_data_transform(x, normalize_pctwise=pct_norm,istest=True),
                            last_layer = net['output'],
                            save_best_params_to=c.params_dir + '/fcn_v{}_{}_f{}.npz'.format(version, vvv,i))
        if i < 5 and best_epoch>max_epoch:
            max_epoch = best_epoch;
    if CV:
        for pfn in ['fcn_v{}_{}_f{}.npz'.format(version, vvv, i) for i in xrange(ntimes-1)]:#!!!!CHANGE
            u.load_params(net['output'], os.path.join(c.params_dir, pfn))
            testfold = int(pfn.split('_')[-1][1])
            data = u.DataH5PyStreamer(os.path.join(c.data_sunnybrook, datafile),
                                      batch_size=16, folds=(5,testfold))
            streamer = data.streamer(training=False, shuffled=True)

            accs = []
            for imb in streamer.get_epoch_iterator():
                x,y = du.segmenter_data_transform(imb,normalize_pctwise=pct_norm)
                accs.append(acc_fn(x,y))
            print pfn, np.mean(accs)
Ejemplo n.º 45
0
def main(model_options):
  
  print 'Loading data'
  dp = data_provider()
  dp.load_data(model_options['batch_size'], model_options['word_count_threshold'])
  dp.build_word_vocab()
  dp.group_train_captions_by_length()
  model_options['vocab_size'] = dp.get_word_vocab_size()

  print 'Building model'  
  # This create the initial parameters as numpy ndarrays.
  generator = caption_generator()
  params = generator.init_params(model_options)
  save_n = {}
  save_n['checkpoint'] = 0
  save_n['prediction'] = 0
  
  # reload a saved checkpoint
  if model_options['reload_checkpoint_path']:
    _, save_n['checkpoint'] = utils.load_params(model_options['reload_checkpoint_path'], params)
    print 'Reloaded checkpoint from', model_options['reload_checkpoint_path']
  
  # This create Theano Shared Variable from the parameters.
  # Dict name (string) -> Theano Tensor Shared Variable
  # params and tparams have different copy of the weights.
  tparams = utils.init_tparams(params)
  
  # use_noise is for dropout
  sents, mask, imgs, gt_sents, use_noise, cost = generator.build_model(tparams)
  grads = tensor.grad(cost, wrt=tparams.values())
  
  lr = tensor.scalar(name='lr')
  f_grad_shared, f_update = optimizers[model_options['optimizer']](lr, tparams, grads, sents, mask, imgs, gt_sents, cost)
  
  imgs_to_predict, predicted_indices, predicted_prob = generator.predict(tparams)
  f_pred = theano.function([imgs_to_predict], predicted_indices, name='f_pred')
  f_pred_prob = theano.function([imgs_to_predict], predicted_prob, name='f_pred_prob')
    
  train_iter = dp.train_iterator
  kf_valid = KFold(len(dp.split['val']), n_folds=len(dp.split['val']) / model_options['batch_size'], shuffle=False)
  
  if model_options['use_dropout'] == 1:
    use_noise.set_value(1.)
  else:
    use_noise.set_value(0.)
     
  print 'Optimization'
  
  uidx = 0
  lrate = model_options['lrate']
  # display print time duration
  dp_start = time.time()
  for eidx in xrange(model_options['max_epochs']):
    print 'Epoch ', eidx
    
    for batch_data in train_iter:
      uidx += 1
      
      # preparing the mini batch data
      pd_start = time.time()
      sents, sents_mask, imgs, gt_sents = dp.prepare_train_batch_data(batch_data)
      pd_duration = time.time() - pd_start
      
      if sents is None:
        print 'Minibatch is empty'
        continue
      
      # training on the mini batch
      ud_start = time.time()
      cost = f_grad_shared(sents, sents_mask, imgs, gt_sents)
      f_update(lrate)
      ud_duration = time.time() - ud_start
      
      # Numerical stability check
      if numpy.isnan(cost) or numpy.isinf(cost):
        print 'NaN detected'
      
      if numpy.mod(uidx, model_options['disp_freq']) == 0:
        dp_duration = time.time() - dp_start
        print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'Prepare data ', pd_duration, 'Update data ', ud_duration, '{0}_iter_time {1}'.format(model_options['disp_freq'], dp_duration)
        dp_start = time.time()

      # Log validation loss + checkpoint the model with the best validation log likelihood
      if numpy.mod(uidx, model_options['valid_freq']) == 0:
        scores = validate_and_save_checkpoint(model_options, dp, params, tparams, f_pred, f_pred_prob, kf_valid, save_n)
        print scores
  
  print 'Performing final validation'
  scores = validate_and_save_checkpoint(model_options, dp, params, tparams, f_pred, f_pred_prob, kf_valid, save_n)
  print scores
  print 'Done!!!'
def train(dim_word_desc=400,# word vector dimensionality
          dim_word_q=400,
          dim_word_ans=600,
          dim_proj=300,
          dim=400,# the number of LSTM units
          encoder_desc='lstm',
          encoder_desc_word='lstm',
          encoder_desc_sent='lstm',
          use_dq_sims=False,
          eyem=None,
          learn_h0=False,
          use_desc_skip_c_g=False,
          debug=False,
          encoder_q='lstm',
          patience=10,
          max_epochs=5000,
          dispFreq=100,
          decay_c=0.,
          alpha_c=0.,
          clip_c=-1.,
          lrate=0.01,
          n_words_q=49145,
          n_words_desc=115425,
          n_words_ans=409,
          pkl_train_files=None,
          pkl_valid_files=None,
          maxlen=2000, # maximum length of the description
          optimizer='rmsprop',
          batch_size=2,
          vocab=None,
          valid_batch_size=16,
          use_elu_g=False,
          saveto='model.npz',
          model_dir=None,
          ms_nlayers=3,
          validFreq=1000,
          saveFreq=1000, # save the parameters after every saveFreq updates
          datasets=[None],
          truncate=400,
          momentum=0.9,
          use_bidir=False,
          cost_mask=None,
          valid_datasets=['/u/yyu/stor/caglar/rc-data/cnn/cnn_test_data.h5',
                          '/u/yyu/stor/caglar/rc-data/cnn/cnn_valid_data.h5'],
          dropout_rate=0.5,
          use_dropout=True,
          reload_=True,
          **opt_ds):

    ensure_dir_exists(model_dir)
    mpath = os.path.join(model_dir, saveto)
    mpath_best = os.path.join(model_dir, prfx("best", saveto))
    mpath_last = os.path.join(model_dir, prfx("last", saveto))
    mpath_stats = os.path.join(model_dir, prfx("stats", saveto))

    # Model options
    model_options = locals().copy()
    model_options['use_sent_reps'] = opt_ds['use_sent_reps']
    stats = defaultdict(list)

    del model_options['eyem']
    del model_options['cost_mask']

    if cost_mask is not None:
        cost_mask = sharedX(cost_mask)

    # reload options and parameters
    if reload_:
        print "Reloading the model."
        if os.path.exists(mpath_best):
            print "Reloading the best model from %s." % mpath_best
            with open(os.path.join(mpath_best, '%s.pkl' % mpath_best), 'rb') as f:
                models_options = pkl.load(f)
            params = init_params(model_options)
            params = load_params(mpath_best, params)
        elif os.path.exists(mpath):
            print "Reloading the model from %s." % mpath
            with open(os.path.join(mpath, '%s.pkl' % mpath), 'rb') as f:
                models_options = pkl.load(f)
            params = init_params(model_options)
            params = load_params(mpath, params)
        else:
            raise IOError("Couldn't open the file.")
    else:
        print "Couldn't reload the models initializing from scratch."
        params = init_params(model_options)

    if datasets[0]:
        print "Short dataset", datasets[0]

    print 'Loading data'
    print 'Building model'
    if pkl_train_files is None or pkl_valid_files is None:
        train, valid, test = load_data(path=datasets[0],
                                       valid_path=valid_datasets[0],
                                       test_path=valid_datasets[1],
                                       batch_size=batch_size,
                                       **opt_ds)
    else:
        train, valid, test = load_pkl_data(train_file_paths=pkl_train_files,
                                           valid_file_paths=pkl_valid_files,
                                           batch_size=batch_size,
                                           vocab=vocab,
                                           eyem=eyem,
                                           **opt_ds)

    tparams = init_tparams(params)
    trng, use_noise, inps_d, \
                     opt_ret, \
                     cost, errors, ent_errors, ent_derrors, probs = \
                        build_model(tparams,
                                    model_options,
                                    prepare_data if not opt_ds['use_sent_reps'] \
                                            else prepare_data_sents,
                                    valid,
                                    cost_mask=cost_mask)

    alphas = opt_ret['dec_alphas']

    if opt_ds['use_sent_reps']:
        inps = [inps_d["desc"], \
                inps_d["word_mask"], \
                inps_d["q"], \
                inps_d['q_mask'], \
                inps_d['ans'], \
                inps_d['wlen'],
                inps_d['slen'], inps_d['qlen'],\
                inps_d['ent_mask']
                ]
    else:
        inps = [inps_d["desc"], \
                inps_d["word_mask"], \
                inps_d["q"], \
                inps_d['q_mask'], \
                inps_d['ans'], \
                inps_d['wlen'], \
                inps_d['qlen'], \
                inps_d['ent_mask']]

    outs = [cost, errors, probs, alphas]
    if ent_errors:
        outs += [ent_errors]

    if ent_derrors:
        outs += [ent_derrors]

    # before any regularizer
    print 'Building f_log_probs...',
    f_log_probs = theano.function(inps, outs, profile=profile)
    print 'Done'

    # Apply weight decay on the feed-forward connections
    if decay_c > 0.:
        decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
        weight_decay = 0.

        for kk, vv in tparams.iteritems():
            if "logit" in kk or "ff" in kk:
                weight_decay += (vv ** 2).sum()

        weight_decay *= decay_c
        cost += weight_decay

    # after any regularizer
    print 'Computing gradient...',
    grads = safe_grad(cost, itemlist(tparams))
    print 'Done'

    # Gradient clipping:
    if clip_c > 0.:
        g2 = get_norms(grads)
        for p, g in grads.iteritems():
            grads[p] = tensor.switch(g2 > (clip_c**2),
                                     (g / tensor.sqrt(g2 + 1e-8)) * clip_c,
                                     g)
    inps.pop()
    if optimizer.lower() == "adasecant":
        learning_rule = Adasecant(delta_clip=25.0,
                                  use_adagrad=True,
                                  grad_clip=0.25,
                                  gamma_clip=0.)
    elif optimizer.lower() == "rmsprop":
        learning_rule = RMSPropMomentum(init_momentum=momentum)
    elif optimizer.lower() == "adam":
        learning_rule = Adam()
    elif optimizer.lower() == "adadelta":
        learning_rule = AdaDelta()

    lr = tensor.scalar(name='lr')
    print 'Building optimizers...',
    learning_rule = None

    if learning_rule:
        f_grad_shared, f_update = learning_rule.get_funcs(learning_rate=lr,
                                                          grads=grads,
                                                          inp=inps,
                                                          cost=cost,
                                                          errors=errors)
    else:
        f_grad_shared, f_update = eval(optimizer)(lr,
                                                  tparams,
                                                  grads,
                                                  inps,
                                                  cost,
                                                  errors)

    print 'Done'
    print 'Optimization'
    history_errs = []
    # reload history
    if reload_ and os.path.exists(mpath):
        history_errs = list(numpy.load(mpath)['history_errs'])

    best_p = None
    bad_count = 0

    if validFreq == -1:
        validFreq = len(train[0]) / batch_size

    if saveFreq == -1:
        saveFreq = len(train[0]) / batch_size

    best_found = False
    uidx = 0
    estop = False

    train_cost_ave, train_err_ave, \
            train_gnorm_ave = reset_train_vals()

    for eidx in xrange(max_epochs):
        n_samples = 0

        if train.done:
            train.reset()

        for d_, q_, a, em in train:
            n_samples += len(a)
            uidx += 1
            use_noise.set_value(1.)

            if opt_ds['use_sent_reps']:
                # To mask the description and the question.
                d, d_mask, q, q_mask, dlen, slen, qlen = prepare_data_sents(d_,
                                                                            q_)

                if d is None:
                    print 'Minibatch with zero sample under length ', maxlen
                    uidx -= 1
                    continue

                ud_start = time.time()
                cost, errors, gnorm, pnorm = f_grad_shared(d,
                                                           d_mask,
                                                           q,
                                                           q_mask,
                                                           a,
                                                           dlen,
                                                           slen,
                                                           qlen)
            else:
                d, d_mask, q, q_mask, dlen, qlen = prepare_data(d_, q_)

                if d is None:
                    print 'Minibatch with zero sample under length ', maxlen
                    uidx -= 1
                    continue

                ud_start = time.time()
                cost, errors, gnorm, pnorm = f_grad_shared(d, d_mask,
                                                           q, q_mask,
                                                           a,
                                                           dlen,
                                                           qlen)

            upnorm = f_update(lrate)
            ud = time.time() - ud_start

            # Collect the running ave train stats.
            train_cost_ave = running_ave(train_cost_ave,
                                         cost)
            train_err_ave = running_ave(train_err_ave,
                                        errors)
            train_gnorm_ave = running_ave(train_gnorm_ave,
                                          gnorm)

            if numpy.isnan(cost) or numpy.isinf(cost):
                print 'NaN detected'
                import ipdb; ipdb.set_trace()

            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch ', eidx, ' Update ', uidx, \
                        ' Cost ', cost, ' UD ', ud, \
                        ' UpNorm ', upnorm[0].tolist(), \
                        ' GNorm ', gnorm, \
                        ' Pnorm ', pnorm, 'Terrors ', errors

            if numpy.mod(uidx, saveFreq) == 0:
                print 'Saving...',
                if best_p is not None and best_found:
                    numpy.savez(mpath_best, history_errs=history_errs, **best_p)
                    pkl.dump(model_options, open('%s.pkl' % mpath_best, 'wb'))
                else:
                    params = unzip(tparams)

                numpy.savez(mpath, history_errs=history_errs, **params)
                pkl.dump(model_options, open('%s.pkl' % mpath, 'wb'))
                pkl.dump(stats, open("%s.pkl" % mpath_stats, 'wb'))

                print 'Done'
                print_param_norms(tparams)

            if numpy.mod(uidx, validFreq) == 0:
                use_noise.set_value(0.)
                if valid.done:
                    valid.reset()

                valid_costs, valid_errs, valid_probs, \
                        valid_alphas, error_ent, error_dent = eval_model(f_log_probs,
                                                  prepare_data if not opt_ds['use_sent_reps'] \
                                                    else prepare_data_sents,
                                                  model_options,
                                                  valid,
                                                  use_sent_rep=opt_ds['use_sent_reps'])

                valid_alphas_ = numpy.concatenate([va.argmax(0) for va  in valid_alphas.tolist()], axis=0)
                valid_err = valid_errs.mean()
                valid_cost = valid_costs.mean()
                valid_alpha_ent = -negentropy(valid_alphas)

                mean_valid_alphas = valid_alphas_.mean()
                std_valid_alphas = valid_alphas_.std()

                mean_valid_probs = valid_probs.argmax(1).mean()
                std_valid_probs = valid_probs.argmax(1).std()

                history_errs.append([valid_cost, valid_err])

                stats['train_err_ave'].append(train_err_ave)
                stats['train_cost_ave'].append(train_cost_ave)
                stats['train_gnorm_ave'].append(train_gnorm_ave)

                stats['valid_errs'].append(valid_err)
                stats['valid_costs'].append(valid_cost)
                stats['valid_err_ent'].append(error_ent)
                stats['valid_err_desc_ent'].append(error_dent)

                stats['valid_alphas_mean'].append(mean_valid_alphas)
                stats['valid_alphas_std'].append(std_valid_alphas)
                stats['valid_alphas_ent'].append(valid_alpha_ent)

                stats['valid_probs_mean'].append(mean_valid_probs)
                stats['valid_probs_std'].append(std_valid_probs)

                if uidx == 0 or valid_err <= numpy.array(history_errs)[:, 1].min():
                    best_p = unzip(tparams)
                    bad_counter = 0
                    best_found = True
                else:
                    bst_found = False

                if numpy.isnan(valid_err):
                    import ipdb; ipdb.set_trace()


                print "============================"
                print '\t>>>Valid error: ', valid_err, \
                        ' Valid cost: ', valid_cost
                print '\t>>>Valid pred mean: ', mean_valid_probs, \
                        ' Valid pred std: ', std_valid_probs
                print '\t>>>Valid alphas mean: ', mean_valid_alphas, \
                        ' Valid alphas std: ', std_valid_alphas, \
                        ' Valid alpha negent: ', valid_alpha_ent, \
                        ' Valid error ent: ', error_ent, \
                        ' Valid error desc ent: ', error_dent

                print "============================"
                print "Running average train stats "
                print '\t>>>Train error: ', train_err_ave, \
                        ' Train cost: ', train_cost_ave, \
                        ' Train grad norm: ', train_gnorm_ave
                print "============================"


                train_cost_ave, train_err_ave, \
                    train_gnorm_ave = reset_train_vals()


        print 'Seen %d samples' % n_samples

        if estop:
            break

    if best_p is not None:
        zipp(best_p, tparams)

    use_noise.set_value(0.)
    valid.reset()
    valid_cost, valid_error, valid_probs, \
            valid_alphas, error_ent = eval_model(f_log_probs,
                                      prepare_data if not opt_ds['use_sent_reps'] \
                                           else prepare_data_sents,
                                      model_options, valid,
                                      use_sent_rep=opt_ds['use_sent_rep'])

    print " Final eval resuts: "
    print 'Valid error: ', valid_error.mean()
    print 'Valid cost: ', valid_cost.mean()
    print '\t>>>Valid pred mean: ', valid_probs.mean(), \
            ' Valid pred std: ', valid_probs.std(), \
            ' Valid error ent: ', error_ent

    params = copy.copy(best_p)

    numpy.savez(mpath_last,
                zipped_params=best_p,
                history_errs=history_errs,
                **params)

    return valid_err, valid_cost
Ejemplo n.º 47
0
def build_fn(args, embeddings):
    """
        Build training and testing functions.
    """
    in_x1 = T.imatrix('x1')
    in_x2 = T.imatrix('x2')
    in_mask1 = T.matrix('mask1')
    in_mask2 = T.matrix('mask2')
    in_l = T.matrix('l')
    in_y = T.ivector('y')

    l_in1 = lasagne.layers.InputLayer((None, None), in_x1)
    l_mask1 = lasagne.layers.InputLayer((None, None), in_mask1)
    l_emb1 = lasagne.layers.EmbeddingLayer(l_in1, args.vocab_size,
                                           args.embedding_size, W=embeddings)

    l_in2 = lasagne.layers.InputLayer((None, None), in_x2)
    l_mask2 = lasagne.layers.InputLayer((None, None), in_mask2)
    l_emb2 = lasagne.layers.EmbeddingLayer(l_in2, args.vocab_size,
                                           args.embedding_size, W=l_emb1.W)

    network1 = nn_layers.stack_rnn(l_emb1, l_mask1, args.num_layers, args.hidden_size,
                                   grad_clipping=args.grad_clipping,
                                   dropout_rate=args.dropout_rate,
                                   only_return_final=(args.att_func == 'last'),
                                   bidir=args.bidir,
                                   name='d',
                                   rnn_layer=args.rnn_layer)

    network2 = nn_layers.stack_rnn(l_emb2, l_mask2, args.num_layers, args.hidden_size,
                                   grad_clipping=args.grad_clipping,
                                   dropout_rate=args.dropout_rate,
                                   only_return_final=True,
                                   bidir=args.bidir,
                                   name='q',
                                   rnn_layer=args.rnn_layer)

    args.rnn_output_size = args.hidden_size * 2 if args.bidir else args.hidden_size

    if args.att_func == 'mlp':
        att = nn_layers.MLPAttentionLayer([network1, network2], args.rnn_output_size,
                                          mask_input=l_mask1)
    elif args.att_func == 'bilinear':
        att = nn_layers.BilinearAttentionLayer([network1, network2], args.rnn_output_size,
                                               mask_input=l_mask1)
    elif args.att_func == 'avg':
        att = nn_layers.AveragePoolingLayer(network1, mask_input=l_mask1)
    elif args.att_func == 'last':
        att = network1
    elif args.att_func == 'dot':
        att = nn_layers.DotProductAttentionLayer([network1, network2], mask_input=l_mask1)
    else:
        raise NotImplementedError('att_func = %s' % args.att_func)

    network = lasagne.layers.DenseLayer(att, args.num_labels,
                                        nonlinearity=lasagne.nonlinearities.softmax)

    if args.pre_trained is not None:
        dic = utils.load_params(args.pre_trained)
        lasagne.layers.set_all_param_values(network, dic['params'], trainable=True)
        del dic['params']
        logging.info('Loaded pre-trained model: %s' % args.pre_trained)
        for dic_param in dic.iteritems():
            logging.info(dic_param)

    logging.info('#params: %d' % lasagne.layers.count_params(network, trainable=True))
    for layer in lasagne.layers.get_all_layers(network):
        logging.info(layer)

    # Test functions
    test_prob = lasagne.layers.get_output(network, deterministic=True) * in_l
    test_prediction = T.argmax(test_prob, axis=-1)
    acc = T.sum(T.eq(test_prediction, in_y))
    test_fn = theano.function([in_x1, in_mask1, in_x2, in_mask2, in_l, in_y], acc)

    # Train functions
    train_prediction = lasagne.layers.get_output(network) * in_l
    train_prediction = train_prediction / \
        train_prediction.sum(axis=1).reshape((train_prediction.shape[0], 1))
    train_prediction = T.clip(train_prediction, 1e-7, 1.0 - 1e-7)
    loss = lasagne.objectives.categorical_crossentropy(train_prediction, in_y).mean()
    # TODO: lasagne.regularization.regularize_network_params(network, lasagne.regularization.l2)
    params = lasagne.layers.get_all_params(network, trainable=True)

    if args.optimizer == 'sgd':
        updates = lasagne.updates.sgd(loss, params, args.learning_rate)
    elif args.optimizer == 'adam':
        updates = lasagne.updates.adam(loss, params)
    elif args.optimizer == 'rmsprop':
        updates = lasagne.updates.rmsprop(loss, params)
    else:
        raise NotImplementedError('optimizer = %s' % args.optimizer)
    train_fn = theano.function([in_x1, in_mask1, in_x2, in_mask2, in_l, in_y],
                               loss, updates=updates)

    return train_fn, test_fn, params
Ejemplo n.º 48
0
def main(data_file = '', num_epochs=10, batch_size = 128, L=2, z_dim=256,
        n_hid=1500, binary='false', img_size = 64, init_from = '', save_to='params',
        split_layer='conv7', pxsh = 0.5, specstr = c.pf_cae_specstr,
        cae_weights=c.pf_cae_params, deconv_weights = c.pf_deconv_params):
    binary = binary.lower() == 'true'

    # pre-trained function for extracting convolutional features from images
    cae = m.build_cae(input_var=None, specstr=specstr, shape=(img_size,img_size))
    laydict = dict((l.name, l) for l in nn.layers.get_all_layers(cae))
    convshape = nn.layers.get_output_shape(laydict[split_layer])
    convs_from_img, _ = m.encoder_decoder(cae_weights, specstr=specstr, layersplit=split_layer,
            shape=(img_size, img_size))
    # pre-trained function for returning to images from convolutional features
    img_from_convs = m.deconvoluter(deconv_weights, specstr=specstr, shape=convshape)

    # Create VAE model
    print("Building model and compiling functions...")
    print("L = {}, z_dim = {}, n_hid = {}, binary={}".format(L, z_dim, n_hid, binary))
    input_var = T.tensor4('inputs')
    c,w,h = convshape[1], convshape[2], convshape[3]
    l_tup = l_z_mu, l_z_ls, l_x_mu_list, l_x_ls_list, l_x_list, l_x = \
            m.build_vae(input_var, L=L, binary=binary, z_dim=z_dim, n_hid=n_hid,
                   shape=(w,h), channels=c)

    if len(init_from) > 0:
        print("loading from {}".format(init_from))
        u.load_params(l_x, init_from)
    
    # build loss, updates, training, prediction functions
    loss,_ = u.build_vae_loss(input_var, *l_tup, deterministic=False, binary=binary, L=L)
    test_loss, test_prediction = u.build_vae_loss(input_var, *l_tup, deterministic=True,
            binary=binary, L=L)

    lr = theano.shared(nn.utils.floatX(1e-5))
    params = nn.layers.get_all_params(l_x, trainable=True)
    updates = nn.updates.adam(loss, params, learning_rate=lr)
    train_fn = theano.function([input_var], loss, updates=updates)
    val_fn = theano.function([input_var], test_loss)
    ae_fn = theano.function([input_var], test_prediction)

    # run training loop
    def data_transform(x, do_center):
        floatx_ims = u.raw_to_floatX(x, pixel_shift=pxsh, square=True, center=do_center)
        return convs_from_img(floatx_ims)

    print("training for {} epochs".format(num_epochs))
    data = u.DataH5PyStreamer(data_file, batch_size=batch_size)
    hist = u.train_with_hdf5(data, num_epochs=num_epochs, train_fn=train_fn, test_fn=val_fn,
                             tr_transform=lambda x: data_transform(x[0], do_center=False),
                             te_transform=lambda x: data_transform(x[0], do_center=True))

    # generate examples, save training history
    te_stream = data.streamer(shuffled=True)
    imb, = next(te_stream.get_epoch_iterator())
    orig_feats = data_transform(imb, do_center=True)
    reconstructed_feats = ae_fn(orig_feats).reshape(orig_feats.shape)
    orig_feats_deconv = img_from_convs(orig_feats)
    reconstructed_feats_deconv = img_from_convs(reconstructed_feats)
    for i in range(reconstructed_feats_deconv.shape[0]):
        u.get_image_pair(orig_feats_deconv, reconstructed_feats_deconv, index=i, shift=pxsh)\
                .save('output_{}.jpg'.format(i))
    hist = np.asarray(hist)
    np.savetxt('vae_convs_train_hist.csv', np.asarray(hist), delimiter=',', fmt='%.5f')
    u.save_params(l_x, os.path.join(save_to, 'vae_convs_{}.npz'.format(hist[-1,-1])))
Ejemplo n.º 49
0
def main(save_to='params', 
         dataset = 'mm',
         kl_loss='true', # use kl-div in z-space instead of mse
         diffs = 'false',
         seq_length = 30,
         num_epochs=1,
         lstm_n_hid=1024,
         max_per_epoch=-1
        ):
    kl_loss = kl_loss.lower() == 'true'
    diffs = diffs.lower() == 'true'

    # set up functions for data pre-processing and model training
    input_var = T.tensor4('inputs')

    # different experimental setup for moving mnist vs pulp fiction dataests
    if dataset == 'pf':
        img_size = 64
        cae_weights = c.pf_cae_params
        cae_specstr = c.pf_cae_specstr
        split_layer = 'conv7'
        inpvar = T.tensor4('input')
        net = m.build_cae(inpvar, specstr=cae_specstr, shape=(img_size, img_size))
        convs_from_img,_ = m.encoder_decoder(cae_weights, specstr=cae_specstr,
                layersplit=split_layer, shape=(img_size, img_size), poolinv=True)
        laydict = dict((l.name, l) for l in nn.layers.get_all_layers(net))
        zdec_in_shape = nn.layers.get_output_shape(laydict[split_layer])
        deconv_weights = c.pf_deconv_params
        vae_weights = c.pf_vae_params
        img_from_convs = m.deconvoluter(deconv_weights, specstr=cae_specstr, shape=zdec_in_shape)
        L=2
        vae_n_hid = 1500
        binary = False
        z_dim = 256
        l_tup = l_z_mu, l_z_ls, l_x_mu_list, l_x_ls_list, l_x_list, l_x = \
               m.build_vae(input_var, L=L, binary=binary, z_dim=z_dim, n_hid=vae_n_hid,
                        shape=(zdec_in_shape[2], zdec_in_shape[3]), channels=zdec_in_shape[1])
        u.load_params(l_x, vae_weights)
        datafile = 'data/pf.hdf5'
        frame_skip=3 # every 3rd frame in sequence
        z_decode_layer = l_x_mu_list[0]
        pixel_shift = 0.5
        samples_per_image = 4
        tr_batch_size = 16 # must be a multiple of samples_per_image
    elif dataset == 'mm':
        img_size = 64
        cvae_weights = c.mm_cvae_params
        L=2
        vae_n_hid = 1024
        binary = True
        z_dim = 32
        zdec_in_shape = (None, 1, img_size, img_size)
        l_tup = l_z_mu, l_z_ls, l_x_mu_list, l_x_ls_list, l_x_list, l_x = \
            m.build_vcae(input_var, L=L, z_dim=z_dim, n_hid=vae_n_hid, binary=binary,
                       shape=(zdec_in_shape[2], zdec_in_shape[3]), channels=zdec_in_shape[1])
        u.load_params(l_x, cvae_weights)
        datafile = 'data/moving_mnist.hdf5'
        frame_skip=1
        w,h=img_size,img_size # of raw input image in the hdf5 file
        z_decode_layer = l_x_list[0]
        pixel_shift = 0
        samples_per_image = 1
        tr_batch_size = 128 # must be a multiple of samples_per_image

    # functions for moving to/from image or conv-space, and z-space
    z_mat = T.matrix('z')
    zenc = theano.function([input_var], nn.layers.get_output(l_z_mu, deterministic=True))
    zdec = theano.function([z_mat], nn.layers.get_output(z_decode_layer, {l_z_mu:z_mat},
        deterministic=True).reshape((-1, zdec_in_shape[1]) + zdec_in_shape[2:]))
    zenc_ls = theano.function([input_var], nn.layers.get_output(l_z_ls, deterministic=True))

    # functions for encoding sequences of z's
    print 'compiling functions'
    z_var = T.tensor3('z_in')
    z_ls_var = T.tensor3('z_ls_in')
    tgt_mu_var = T.tensor3('z_tgt')
    tgt_ls_var = T.tensor3('z_ls_tgt')
    learning_rate = theano.shared(nn.utils.floatX(1e-4))

    # separate function definitions if we are using MSE and predicting only z, or KL divergence
    # and predicting both mean and sigma of z
    if kl_loss:
        def kl(p_mu, p_sigma, q_mu, q_sigma):
            return 0.5 * T.sum(T.sqr(p_sigma)/T.sqr(q_sigma) + T.sqr(q_mu - p_mu)/T.sqr(q_sigma)
                               - 1 + 2*T.log(q_sigma) - 2*T.log(p_sigma))
        lstm, _ = m.Z_VLSTM(z_var, z_ls_var, z_dim=z_dim, nhid=lstm_n_hid, training=True)
        z_mu_expr, z_ls_expr = nn.layers.get_output([lstm['output_mu'], lstm['output_ls']])
        z_mu_expr_det, z_ls_expr_det = nn.layers.get_output([lstm['output_mu'],
            lstm['output_ls']], deterministic=True)
        loss = kl(tgt_mu_var, T.exp(tgt_ls_var), z_mu_expr, T.exp(z_ls_expr))
        te_loss = kl(tgt_mu_var, T.exp(tgt_ls_var), z_mu_expr_det, T.exp(z_ls_expr_det))
        params = nn.layers.get_all_params(lstm['output'], trainable=True)
        updates = nn.updates.adam(loss, params, learning_rate=learning_rate)
        train_fn = theano.function([z_var, z_ls_var, tgt_mu_var, tgt_ls_var], loss, 
                updates=updates)
        test_fn = theano.function([z_var, z_ls_var, tgt_mu_var, tgt_ls_var], te_loss)
    else:
        lstm, _ = m.Z_LSTM(z_var, z_dim=z_dim, nhid=lstm_n_hid, training=True)
        loss = nn.objectives.squared_error(nn.layers.get_output(lstm['output']),
                tgt_mu_var).mean()
        te_loss = nn.objectives.squared_error(nn.layers.get_output(lstm['output'],
            deterministic=True), tgt_mu_var).mean()
        params = nn.layers.get_all_params(lstm['output'], trainable=True)
        updates = nn.updates.adam(loss, params, learning_rate=learning_rate)
        train_fn = theano.function([z_var, tgt_mu_var], loss, updates=updates)
        test_fn = theano.function([z_var, tgt_mu_var], te_loss)

    if dataset == 'pf':
        z_from_img = lambda x: zenc(convs_from_img(x))
        z_ls_from_img = lambda x: zenc_ls(convs_from_img(x))
        img_from_z = lambda z: img_from_convs(zdec(z))
    elif dataset == 'mm':
        z_from_img = zenc
        z_ls_from_img = zenc_ls
        img_from_z = zdec

    # training loop
    print('training for {} epochs'.format(num_epochs))
    nbatch = (seq_length+1) * tr_batch_size * frame_skip / samples_per_image
    data = u.DataH5PyStreamer(datafile, batch_size=nbatch)

    # for taking arrays of uint8 (non square) and converting them to batches of sequences
    def transform_data(ims_batch, center=False):
        imb = u.raw_to_floatX(ims_batch, pixel_shift=pixel_shift,
                center=center)[np.random.randint(frame_skip)::frame_skip]
        zbatch = np.zeros((tr_batch_size, seq_length+1, z_dim), dtype=theano.config.floatX)
        zsigbatch = np.zeros((tr_batch_size, seq_length+1, z_dim), dtype=theano.config.floatX)
        for i in xrange(samples_per_image):
            chunk = tr_batch_size/samples_per_image
            if diffs:
                zf = z_from_img(imb).reshape((chunk, seq_length+1, -1))
                zbatch[i*chunk:(i+1)*chunk, 1:] = zf[:,1:] - zf[:,:-1]
                if kl_loss:
                    zls = z_ls_from_img(imb).reshape((chunk, seq_length+1, -1))
                    zsigbatch[i*chunk:(i+1)*chunk, 1:] = zls[:,1:] - zls[:,:-1]
            else:
                zbatch[i*chunk:(i+1)*chunk] = z_from_img(imb).reshape((chunk, seq_length+1, -1))
                if kl_loss:
                    zsigbatch[i*chunk:(i+1)*chunk] = z_ls_from_img(imb).reshape((chunk,
                        seq_length+1, -1))
        if kl_loss:
            return zbatch[:,:-1,:], zsigbatch[:,:-1,:], zbatch[:,1:,:], zsigbatch[:,1:,:]
        return zbatch[:,:-1,:], zbatch[:,1:,:]

    # we need sequences of images, so we do not shuffle data during trainin
    hist = u.train_with_hdf5(data, num_epochs=num_epochs, train_fn=train_fn, test_fn=test_fn,
                     train_shuffle=False,
                     max_per_epoch=max_per_epoch,
                     tr_transform=lambda x: transform_data(x[0], center=False),
                     te_transform=lambda x: transform_data(x[0], center=True))

    hist = np.asarray(hist)
    u.save_params(lstm['output'], os.path.join(save_to, 'lstm_{}.npz'.format(hist[-1,-1])))

    # build functions to sample from LSTM
    # separate cell_init and hid_init from the other learned model parameters
    all_param_values = nn.layers.get_all_param_values(lstm['output'])
    init_indices = [i for i,p in enumerate(nn.layers.get_all_params(lstm['output']))
            if 'init' in str(p)]
    init_values = [all_param_values[i] for i in init_indices]
    params_noinit = [p for i,p in enumerate(all_param_values) if i not in init_indices]

    # build model without learnable init values, and load non-init parameters
    if kl_loss:
        lstm_sample, state_vars = m.Z_VLSTM(z_var, z_ls_var, z_dim=z_dim, nhid=lstm_n_hid,
                training=False)
    else:
        lstm_sample, state_vars = m.Z_LSTM(z_var, z_dim=z_dim, nhid=lstm_n_hid, training=False)
    nn.layers.set_all_param_values(lstm_sample['output'], params_noinit)

    # extract layers representing thee hidden and cell states, and have sample_fn
    # return their outputs
    state_layers_keys = [k for k in lstm_sample.keys() if 'hidfinal' in k or 'cellfinal' in k]
    state_layers_keys = sorted(state_layers_keys)
    state_layers_keys = sorted(state_layers_keys, key = lambda x:int(x.split('_')[1]))
    state_layers = [lstm_sample[s] for s in state_layers_keys]
    if kl_loss:
        sample_fn = theano.function([z_var, z_ls_var] + state_vars,
                nn.layers.get_output([lstm['output_mu'], lstm['output_ls']] + state_layers,
                    deterministic=True))
    else:
        sample_fn = theano.function([z_var] + state_vars,
                nn.layers.get_output([lstm['output']] + state_layers, deterministic=True))

    from images2gif import writeGif
    from PIL import Image

    # sample approximately 30 different generated video sequences
    te_stream = data.streamer(training=True, shuffled=False)
    interval = data.ntrain / data.batch_size / 30
    for idx,imb in enumerate(te_stream.get_epoch_iterator()):
        if idx % interval != 0:
            continue
        z_tup = transform_data(imb[0], center=True)
        seg_idx = np.random.randint(z_tup[0].shape[0])
        if kl_loss:
            z_in, z_ls_in = z_tup[0], z_tup[1]
            z_last, z_ls_last = z_in[seg_idx:seg_idx+1], z_ls_in[seg_idx:seg_idx+1]
            z_vars = [z_last, z_ls_last]
        else:
            z_in = z_tup[0]
            z_last = z_in[seg_idx:seg_idx+1]
            z_vars = [z_last]
        images = []
        state_values = [np.dot(np.ones((z_last.shape[0],1), dtype=theano.config.floatX), s)
                for s in init_values]
        output_list = sample_fn(*(z_vars + state_values))

        # use whole sequence of predictions for output
        z_pred = output_list[0]
        state_values = output_list[2 if kl_loss else 1:]

        rec = img_from_z(z_pred.reshape(-1, z_dim))
        for k in xrange(rec.shape[0]):
            images.append(Image.fromarray(u.get_picture_array(rec, index=k, shift=pixel_shift)))
        k += 1
        # slice prediction to feed into lstm
        z_pred = z_pred[:,-1:,:]
        if kl_loss:
            z_ls_pred = output_list[1][:,-1:,:]
            z_vars = [z_pred, z_ls_pred]
        else:
            z_vars = [z_pred]
        for i in xrange(30): # predict 30 frames after the end of the priming video
            output_list = sample_fn(*(z_vars + state_values))
            z_pred = output_list[0]
            state_values = output_list[2 if kl_loss else 1:]
            rec = img_from_z(z_pred.reshape(-1, z_dim))
            images.append(Image.fromarray(u.get_picture_array(rec, index=0, shift=pixel_shift)))
            if kl_loss:
                z_ls_pred = output_list[1]
                z_vars = [z_pred, z_ls_pred]
            else:
                z_vars = [z_pred]
        writeGif("sample_{}.gif".format(idx),images,duration=0.1,dither=0)