Exemple #1
0
def readdata_nosplit_scaled_subject(input_size, subjects, feature):
    import deepdish.io as ddio
    mkdir_recursive('dataset')
    trainData = ddio.load('dataset/targetdata_scaled.hdf5')
    testlabelData = ddio.load('dataset/labeldata_scaled.hdf5')
    indexData = ddio.load('dataset/index_scaled.hdf5')

    X = np.float32(trainData[feature])
    y = np.float32(testlabelData[feature])
    att = np.concatenate((X, y), axis=1)
    #np.random.shuffle(att)
    X, y = att[:, :input_size], att[:, input_size:]
    subjectLabel = (np.array(pd.DataFrame(indexData)[1]))
    print("==============")
    print(subjectLabel)
    nums = [
        '100', '101', '103', '105', '106', '107', '108', '109', '111', '112',
        '113', '115', '116', '117', '118', '119', '121', '122', '123', '124',
        '200', '201', '202', '203', '205', '207', '208', '209', '210', '212',
        '213', '214', '215', '217', '219', '220', '221', '222', '223', '228',
        '230', '231', '232', '233', '234'
    ]
    num_index = 0
    group = []
    for x in subjectLabel:
        for beat in range(x):
            group.append(nums[num_index])
        num_index += 1
    #group = np.array(group)
    return (X, y, group)
Exemple #2
0
    def parse_kmap(self, key='coeffs'):
        """ Retrieve the parameters to construct the momentum conversion function.
        """

        self.parse_bfile()
        self.fr, self.fc = dio.load(self.kfile)['calibration'][key]
        self.xcent, self.ycent = dio.load(self.kfile)['pcent']
Exemple #3
0
def readdata_nosplit(input_size, feature):
    import deepdish.io as ddio
    mkdir_recursive('dataset')
    trainData = ddio.load('dataset/targetdata.hdf5')
    testlabelData = ddio.load('dataset/labeldata.hdf5')
    X = np.float32(trainData[feature])
    y = np.float32(testlabelData[feature])
    att = np.concatenate((X, y), axis=1)
    np.random.shuffle(att)
    X, y = att[:, :input_size], att[:, input_size:]
    return (X, y)
Exemple #4
0
def loaddata(input_size, feature):
    import deepdish.io as ddio
    mkdir_recursive('dataset')
    trainData = ddio.load('dataset/train.hdf5')
    testlabelData = ddio.load('dataset/trainlabel.hdf5')
    X = np.float32(trainData[feature])
    y = np.float32(testlabelData[feature])
    att = np.concatenate((X, y), axis=1)
    np.random.shuffle(att)
    X, y = att[:, :input_size], att[:, input_size:]
    valData = ddio.load('dataset/test.hdf5')
    vallabelData = ddio.load('dataset/testlabel.hdf5')
    Xval = np.float32(valData[feature])
    yval = np.float32(vallabelData[feature])
    return (X, y, Xval, yval)
Exemple #5
0
def compute_roc_auc_from_sim(category, path_sim_matrix, is_quiet=False):
    """
    Args:
      category: category name
      path_sim_matrix: path to the similarity matrix
      is_quiet: if False output extra information
    Returns:
      roc_auc: average ROC AUC for all labeled anchors.
    """

    if not is_quiet:
        print 'Sim matrix path:', path_sim_matrix
    try:
        sim = scipy.io.loadmat(path_sim_matrix)
    except NotImplementedError:
        # matlab v7.3 file
        sim = dio.load(path_sim_matrix)

    labels_path = join(HDF5_LABELS_DIR, 'labels_{}.hdf5'.format(category))
    with h5py.File(labels_path, mode='r') as f:
        d = covert_labels_to_dict(f)

    roc_auc, roc_auc_list = compute_roc(d, sim)
    print '{} n_acnhors: {} ROC_AUC: {:.3f}'.format(category,
                                                    len(d['anchors']), roc_auc)
    return roc_auc
def loaddata_LOGO(input_size, feature):
    import deepdish.io as ddio
    mkdir_recursive('dataset')
    trainData = ddio.load('dataset/targetdata.hdf5')
    testlabelData = ddio.load('dataset/labeldata.hdf5')
    indexData = ddio.load('dataset/index.hdf5')
    X = np.float32(trainData[feature])
    y = np.float32(testlabelData[feature])
    att = np.concatenate((X, y), axis=1)
    np.random.shuffle(att)
    X, y = att[:, :input_size], att[:, input_size:]
    import pandas as pd
    subjectLabel = (np.array(pd.DataFrame(indexData)[1]))
    group = []
    for x in subjectLabel:
        for beat in range(x):
            group.append(x)
    group = np.array(group)
    return (X, y, group)
Exemple #7
0
    def read_saved_state(self, continuing=False):
        """
        Read a saved state of the sampler to disk.

        The required information to reconstruct the state of the run is read
        from an hdf5 file.
        This currently adds the whole chain to the sampler.
        We then remove the old checkpoint and write all unnecessary items back
        to disk.
        FIXME: Load only the necessary quantities, rather than read/write?

        Parameters
        ----------
        sampler: `dynesty.NestedSampler`
            NestedSampler instance to reconstruct from the saved state.
        continuing: bool
            Whether the run is continuing or terminating, if True, the loaded
            state is mostly written back to disk.
        """
        resume_file = '{}/{}_resume.h5'.format(self.outdir, self.label)

        if os.path.isfile(resume_file):
            saved = load(resume_file)

            self.sampler.saved_u = list(saved['unit_cube_samples'])
            self.sampler.saved_v = list(saved['physical_samples'])
            self.sampler.saved_logl = list(saved['sample_likelihoods'])
            self.sampler.saved_logvol = list(saved['sample_log_volume'])
            self.sampler.saved_logwt = list(saved['sample_log_weights'])
            self.sampler.saved_logz = list(saved['cumulative_log_evidence'])
            self.sampler.saved_logzvar = list(
                saved['cumulative_log_evidence_error'])
            self.sampler.saved_id = list(saved['id'])
            self.sampler.saved_it = list(saved['it'])
            self.sampler.saved_nc = list(saved['nc'])
            self.sampler.saved_boundidx = list(saved['boundidx'])
            self.sampler.saved_bounditer = list(saved['bounditer'])
            self.sampler.saved_scale = list(saved['scale'])
            self.sampler.saved_h = list(saved['cumulative_information'])
            self.sampler.ncall = saved['ncall']
            self.sampler.live_logl = list(saved['live_logl'])
            self.sampler.it = saved['iteration'] + 1
            self.sampler.live_u = saved['live_u']
            self.sampler.live_v = saved['live_v']
            self.sampler.nlive = saved['nlive']
            self.sampler.live_bound = saved['live_bound']
            self.sampler.live_it = saved['live_it']
            self.sampler.added_live = saved['added_live']
            self._remove_checkpoint()
            if continuing:
                self.write_current_state()
            return True

        else:
            return False
Exemple #8
0
 def load(cls, path):
     if path is None:
         return cls.load_from_dict({})
     else:
         d = io.load(path)
         # Check class type
         class_name = d.get('name')
         if class_name is not None:
             return cls.getclass(class_name).load_from_dict(d)
         else:
             return cls.load_from_dict(d)
Exemple #9
0
 def load(cls, path):
     if path is None:
         return cls.load_from_dict({})
     else:
         d = io.load(path)
         # Check class type
         class_name = d.get("name")
         if class_name is not None:
             return cls.getclass(class_name).load_from_dict(d)
         else:
             return cls.load_from_dict(d)
Exemple #10
0
def existing_file_background(filepath):
    """ Returns a numpy array from an image stored at filepath
    """
    if filepath.endswith(".h5"):
        return dio.load(filepath)
    else:
        # If using OpenCV, we have to get RGB, not BGR
        try:
            return cv2.imread(filepath)[:, :, [2, 1, 0]]
        except TypeError:
            log = logging.getLogger()
            log.info("Could nor load " + filepath)
            return np.zeros((10, 10), dtype=np.uint8)
def readdata_nosplit(input_size, feature):
    import deepdish.io as ddio
    mkdir_recursive('dataset')
    #trainData = ddio.load('dataset/targetdata_std.hdf5')
    #testlabelData= ddio.load('dataset/labeldata_std.hdf5')
    #indexData= ddio.load('dataset/index_std.hdf5')
    trainData = ddio.load('dataset/targetdata_debug.hdf5')
    testlabelData= ddio.load('dataset/labeldata_debug.hdf5')
    indexData= ddio.load('dataset/index_debug.hdf5')
    X = np.float32(trainData[feature])
    y = np.float32(testlabelData[feature])
    att = np.concatenate((X,y), axis=1)
    np.random.shuffle(att)
    X , y = att[:,:input_size], att[:, input_size:]
    subjectLabel = (np.array(pd.DataFrame(indexData)[1]))
    group = []
    for x in subjectLabel:
        for beat in range(x):    
            group.append(x)
    group = np.array(group)
    print(np.unique(group, return_counts = True))
    return (X, y, group)
Exemple #12
0
def test():
    '''
    '''
    data = io.load(open('test_data.h5', 'rb'))
    #data = remove_tau(data)

    # -- Load scikit classifier
    classifier = joblib.load('sklBDT_trk2.pkl')
    
    # -- Get classifier predictions
    yhat = classifier.predict_proba(data['X'])[:, 2]

    io.save(open('yhat_test.h5', 'wb'), yhat)
def loaddata(input_size, feature):
    import deepdish.io as ddio
    mkdir_recursive('dataset')

    data = ddio.load('dataset/targetdata.hdf5')
    label = ddio.load('dataset/labeldata.hdf5')
    X = np.float32(data[feature])
    y = np.float32(label[feature])
    att = np.concatenate((X, y), axis=1)
    np.random.shuffle(att)
    X, y = att[:, :input_size], att[:, input_size:]

    from sklearn.model_selection import train_test_split
    X, Xval, y, yval = train_test_split(X, y, test_size=0.3, random_state=1)

    # trainData = ddio.load('dataset/train.hdf5')
    # testlabelData= ddio.load('dataset/trainlabel.hdf5')
    # X = np.float32(trainData[feature])
    # y = np.float32(testlabelData[feature])
    # att = np.concatenate((X,y), axis=1)
    # np.random.shuffle(att)
    # X , y = att[:,:input_size], att[:, input_size:]

    return (X, y, Xval, yval)
Exemple #14
0
 def __init__(self, Ps, Es, Vs=None):
     if type(Ps) == str:
         self.Psfname = Ps
         self.p = dd.load(Ps)
     else:
         self.Psfname = None
         self.p = Ps
     self.setup = Es
     self.setup['nvar'] = 2
     #        self.Vs=Vs
     self.verbose = Es['verbose']
     if self.verbose:
         start = time.time()
     self.set_equations()
     self.dt = Es['dt']
     self.time_elapsed = 0
     if self.setup['setPDE']:
         self.rhs = self.rhs_pde
         self.p['nd'] = len(Es['n'])
         if self.p['nd'] == 2:
             self.p['nx'], self.p['ny'] = Es['n']
             self.p['lx'], self.p['ly'] = Es['l']
             self.l = [self.p['lx'], self.p['ly']]
             self.n = [self.p['nx'], self.p['ny']]
             self.dg = tuple([l / float(n) for l, n in zip(self.l, self.n)])
             self.dx = self.dg[0]
         elif self.p['nd'] == 1:
             self.dg = [Es['l'][0] / float(Es['n'][0])]
             self.dx = self.dg[0]
         self.dx2 = self.dx**2
         self.dt = Es['dt'] * self.dx2 / self.p['delta_s']
         self.X = np.linspace(0, Es['l'][0], Es['n'][0])
         from utilities.laplacian_sparse import create_laplacian  #,create_gradient
         self.lapmat = create_laplacian(
             self.setup['n'],
             self.setup['l'],
             self.setup['bc'], [1.0, self.p['delta_s'], self.p['delta_s']],
             verbose=self.verbose)
         #            self.gradmat=create_gradient(self.setup['n'],self.setup['l'], self.setup['bc'] , [1.0,self.p['Dw'],self.p['Dh']])
         if self.verbose:
             print("Laplacian created")
     else:
         self.rhs = self.rhs_ode
     self.set_integrator()
     if Vs is not None:
         self.setup_initial_condition(Vs)
     if self.verbose:
         print("Time to setup: ", time.time() - start)
 def batch(paths, iptagger, batch_size, random=True):
     while True:
         if random:
             np.random.shuffle(paths)
         for fp in paths:
             d = io.load(fp)
             X = np.concatenate([d['X'], d[iptagger + '_vars']], axis=1)
             le = LabelEncoder()
             y = le.fit_transform(d['y'])
             w = d['w']
             if random:
                 ix = range(X.shape[0])
                 np.random.shuffle(ix)
                 X, y, w = X[ix], y[ix], w[ix]
             for i in xrange(int(np.ceil(X.shape[0] / float(batch_size)))):
                 yield X[(i * batch_size):((i+1)*batch_size)], y[(i * batch_size):((i+1)*batch_size)], w[(i * batch_size):((i+1)*batch_size)]
Exemple #16
0
    def parse_bfile(self):
        """ Retrieve the binning parameters.
        """

        binDict = dio.load(self.bfile)
        binaxes = list(
            map(lambda x: x.decode('utf-8'), binDict['binaxes'].tolist()))
        binranges = binDict['binranges'].tolist()
        binsteps = binDict['binsteps'].tolist()

        # Retrieve the binning steps along X and Y axes
        self.xstep = self.listfind(binaxes, 'X', binsteps)
        self.ystep = self.listfind(binaxes, 'Y', binsteps)

        # Retrieve the binning ranges (br) along X and Y axes
        self.xbr_start, self.xbr_end = self.listfind(binaxes, 'X', binranges)
        self.ybr_start, self.ybr_end = self.listfind(binaxes, 'Y', binranges)
Exemple #17
0
    def __init__(self,Ps,Es,Vs=None):
        if type(Ps)==str:
            self.Psfname=Ps
            if Ps.endswith('csv'):
                import pandas as pd
                self.p = pd.read_csv(Ps, index_col=0, squeeze=True).to_dict()
            elif Ps.endwith('hdf'):
                self.p=dd.load(Ps)
        else:
            self.Psfname=None
            self.p = Ps
        self.setup=Es
#        self.Vs=Vs
        self.verbose=Es['verbose']
        if self.verbose:
            start=time.time()
        self.set_equations()
        self.dt = 0.1
        self.time_elapsed = 0
        if self.setup['setPDE']:
            self.p['nd']=len(Es['n'])
            if self.p['nd']==2:
                self.p['nx'],self.p['ny']=Es['n']
                self.p['lx'],self.p['ly']=Es['l']
                self.l=[self.p['lx'],self.p['ly']]
                self.n=[self.p['nx'],self.p['ny']]
                self.dg  = tuple([l/float(n) for l,n in zip(self.l,self.n)])
                self.dx  = self.dg[0]
            elif self.p['nd']==1:
                self.dg=[Es['l'][0]/float(Es['n'][0])]
                self.dx=self.dg[0]
            self.dx2 = self.dx**2
            self.dt=Es['dt']*self.dx2 / np.amax(self.diffusion_coeffs)
            self.X = np.linspace(0,Es['l'][0],Es['n'][0])
            from utilities.laplacian_sparse import create_laplacian #,create_gradient
            self.lapmat=create_laplacian(self.setup['n'],self.setup['l'], self.setup['bc'] , self.diffusion_coeffs,verbose=self.verbose)
#            self.gradmat=create_gradient(self.setup['n'],self.setup['l'], self.setup['bc'] , [1.0,self.p['Dw'],self.p['Dh']])
            self.set_integrator()
            if self.verbose:
                print("Laplacian created")
        if Vs is not None:
            self.setup_initial_condition(Vs)
        if self.verbose:
            print("Time to setup: ",time.time()-start)
Exemple #18
0
def main():
    opts = parse_options()

    inFile = opts.inputFile
    tree = opts.treeName

    df = root2pandas(inFile, tree)
    # -- save a pandas df to hdf5 (better to first convert it back to ndarray, to be fair)

    import deepdish.io as io
    outFile = inFile.replace(".root", ".h5")
    io.save(outFile, df)

    # -- let's load it back in to make sure it actually worked!
    new_df = io.load(outFile)

    # -- check the shape again -- nice check to run every time you create a df
    print "File check!"
    print "(Number of events, Number of branches): ", new_df.shape
def integrate_from_steady_state(init_cond, alpha, Tmax, ito, idx_finish, step,
                                version):
    Ps = dd.load(Ps_normal)
    Ps['Tmax'] = Tmax
    Ps['alpha'] = 0.0
    Es = Es_normal.copy()
    Es['rhs'] = version
    m = BenincaModel(Es=Es, Ps=Ps, Vs=None)
    if init_cond == 0:
        init_cond = calc_for_constant(m)
    elif init_cond == 1:
        init_cond = np.array([0.103, 0.019, 0.033, 0.040])
    elif init_cond == 2:
        init_cond = np.array([0.8, 0.1, 0.05, 0.1])
    print("Initial condition:", init_cond)
    print("Integrating with SDEINT")
    tspan, result, forcing = calc_for_oscillation_with_Ito(
        m, init_cond, alpha, Tmax, ito, idx_finish, step)
    forcing_tspan = m.Ft(tspan)
    return tspan, result, forcing, forcing_tspan
Exemple #20
0
 def batch(paths, iptagger, batch_size, random=True):
     while True:
         if random:
             np.random.shuffle(paths)
         for fp in paths:
             d = io.load(fp)
             X = np.concatenate([d['X'], d[iptagger + '_vars']], axis=1)
             le = LabelEncoder()
             y = le.fit_transform(d['y'])
             w = d['w']
             if random:
                 ix = range(X.shape[0])
                 np.random.shuffle(ix)
                 X, y, w = X[ix], y[ix], w[ix]
             for i in xrange(int(np.ceil(X.shape[0] / float(batch_size)))):
                 yield X[(i * batch_size):((i + 1) * batch_size)], y[(
                     i *
                     batch_size):((i + 1) *
                                  batch_size)], w[(i *
                                                   batch_size):((i + 1) *
                                                                batch_size)]
Exemple #21
0
def train():
    '''
    '''
    data = io.load(open('train_data.h5', 'rb'))
    #data = remove_tau(data)
    
    if CROSS_VAL:
        param_grid = {'n_estimators':[50, 100], 'max_depth':[3, 5, 10], 'min_samples_split':[2, 5]}
        fit_params = {
                         'sample_weight' : data['w'],
                     }
        metaclassifier = GridSearchCV(GradientBoostingClassifier(), param_grid=param_grid, fit_params=fit_params, \
            cv=2, n_jobs=4, verbose=2)#, scoring=roc_score)
        metaclassifier.fit(data['X'], data['y'])
        classifier = metaclassifier.best_estimator_
        print 'Best classifier:', metaclassifier.best_params_

    else:
        classifier = GradientBoostingClassifier(n_estimators=200, min_samples_split=2, max_depth=10, verbose=1)
        classifier.fit(data['X'], data['y'], sample_weight=data['w'])

    joblib.dump(classifier, 'sklBDT_trk2.pkl', protocol=cPickle.HIGHEST_PROTOCOL)
Exemple #22
0
    def load(cls, path):
        """
        Loads an instance of the class from a file.

        Parameters
        ----------
        path : str
            Path to an HDF5 file.

        Examples
        --------
        This is an abstract data type, but let us say that ``Foo`` inherits
        from ``Saveable``. To construct an object of this class from a file, we
        do:

        >>> foo = Foo.load('foo.h5') #doctest: +SKIP
        """
        if path is None:
            return cls.load_from_dict({})
        else:
            d = io.load(path)
            return cls.load_from_dict(d)
Exemple #23
0
    def load(cls, path):
        """
        Loads an instance of the class from a file.

        Parameters
        ----------
        path : str
            Path to an HDF5 file.

        Examples
        --------
        This is an abstract data type, but let us say that ``Foo`` inherits
        from ``Saveable``. To construct an object of this class from a file, we
        do:

        >>> foo = Foo.load('foo.h5') #doctest: +SKIP
        """
        if path is None:
            return cls.load_from_dict({})
        else:
            d = io.load(path)
            return cls.load_from_dict(d)
Exemple #24
0
def main(embed_size, normed, input_id, run_name):

    configure_logging()
    logger = logging.getLogger("RNNIP Training")

    logger.info("Loading hdf5's")
    test_dict = io.load(os.path.join('data', 'test_dict_' + input_id + '.h5'))
    train_dict = io.load(os.path.join('data',
                                      'train_dict_' + input_id + '.h5'))

    X_train_stream0 = train_dict['grade']
    X_train_stream1 = train_dict['X']
    y_train = train_dict['y']

    X_test_stream0 = test_dict['grade']
    X_test_stream1 = test_dict['X']
    y_test = test_dict['y']

    ip3d = test_dict['ip3d']

    logger.info('Building model')
    model = build_model(X_train_stream0, X_train_stream1, embed_size, normed)
    model.summary()

    logger.info('Compiling model')
    model.compile('adam', 'categorical_crossentropy', metrics=['accuracy'])

    #-- if the pre-trained model exists, load it in, otherwise start from scratch
    safe_mkdir('weights')
    weights_file = os.path.join('weights', 'rnnip_' + run_name + '.h5')
    try:
        model.load_weights(weights_file)
        logger.info('Loaded pre-trained model from ' + weights_file)
    except IOError:
        logger.info('No pre-trained model found in ' + weights_file)

    logger.info('Training:')
    try:
        model.fit([X_train_stream0, X_train_stream1],
                  y_train,
                  batch_size=512,
                  callbacks=[
                      EarlyStopping(verbose=True,
                                    patience=20,
                                    monitor='val_loss'),
                      ModelCheckpoint(weights_file,
                                      monitor='val_loss',
                                      verbose=True,
                                      save_best_only=True)
                  ],
                  epochs=300,
                  validation_split=0.2)

    except KeyboardInterrupt:
        logger.info('Training ended early.')

    # -- load in best network
    logger.info('Loading best epoch')
    model.load_weights(weights_file)

    json_string = model.to_json()
    safe_mkdir('json_models')
    open(os.path.join('json_models', run_name + '.json'),
         'w').write(json_string)

    logger.info('Testing')
    safe_mkdir('predictions')
    yhat = model.predict([X_test_stream0, X_test_stream1],
                         verbose=True,
                         batch_size=10000)
    io.save(os.path.join('predictions', 'yhat' + run_name + '.h5'), yhat)

    logger.info('Plotting ROC')
    plot_ROC(y_test, yhat, ip3d, run_name)
def _fit_and_score_ckpt(workdir=None,
                        checkpoint=True,
                        force_refresh=False,
                        **fit_and_score_kwargs):
    """Fit estimator and compute scores for a given dataset split.

    This function wraps
    :func:`sklearn:sklearn.model_selection._validation._fit_and_score`,
    while also saving checkpoint files containing the estimator, paramters,
    This is useful if fitting and scoring is costly or if it is being
    performed within a large cross-validation experiment.

    In avoid collisions with scores computed for other CV splits, this
    function computes a hash from a nested dictionary containing all keyword
    arguments as well as estimator parameters. It then saves the scores and
    parameters in <hash>_params.h5 and the estimator itself in
    <hash>_estimator.pkl

    Parameters
    ----------
    workdir : path-like object, default=None
        A string or :term:`python:path-like-object` indicating the directory
        in which to store checkpoint files

    checkpoint : bool, default=True
        If True, checkpoint the parameters, estimators, and scores.

    force_refresh : bool, default=False
        If True, recompute scores even if the checkpoint file already exists.
        Otherwise, load scores from checkpoint files and return.

    **fit_and_score_kwargs : kwargs
        Key-word arguments passed to
        :func:`sklearn:sklearn.model_selection._validation._fit_and_score`

    Returns
    -------
    train_scores : dict of scorer name -> float
        Score on training set (for all the scorers),
        returned only if `return_train_score` is `True`.

    test_scores : dict of scorer name -> float
        Score on testing set (for all the scorers).

    n_test_samples : int
        Number of test samples.

    fit_time : float
        Time spent for fitting in seconds.

    score_time : float
        Time spent for scoring in seconds.

    parameters : dict or None
        The parameters that have been evaluated.

    estimator : estimator object
        The fitted estimator
    """
    if not checkpoint:
        return _fit_and_score(**fit_and_score_kwargs)

    if workdir is None:
        raise ValueError(
            "If checkpoint is True, you must supply a working directory "
            "through the ``workdir`` argument.")

    estimator = fit_and_score_kwargs.pop("estimator", None)
    estimator_params = _serialize_estimator_params(estimator.get_params())
    all_params = {
        "estimator_params": estimator_params,
        "fit_and_score_kwargs": fit_and_score_kwargs,
    }

    cv_hash = hashlib.md5(
        json.dumps(all_params, sort_keys=True, ensure_ascii=True,
                   default=str).encode()).hexdigest()

    h5_file = os.path.join(workdir, cv_hash + "_params.h5")
    pkl_file = os.path.join(workdir, cv_hash + "_estimator.pkl")

    if not force_refresh and os.path.exists(h5_file):
        ckpt_dict = ddio.load(h5_file)

        scores = ckpt_dict["scores"]

        if fit_and_score_kwargs.get("return_estimator", False):
            with open(pkl_file, "rb") as fp:
                estimator = pickle.load(fp)

            scores.append(estimator)

        return scores
    else:
        scores = _fit_and_score(estimator, **fit_and_score_kwargs)
        os.makedirs(workdir, exist_ok=True)
        if fit_and_score_kwargs.get("return_estimator", False):
            estimator = scores[-1]
            with open(pkl_file, "wb") as fp:
                pickle.dump(estimator, fp)

            ckpt_scores = scores[:-1]
            if isinstance(estimator, Pipeline):
                model = estimator.steps[-1]
            else:
                model = estimator

            estimator_params = _serialize_estimator_params(
                estimator.get_params())
            fitted_params = {
                "alpha_": getattr(model, "alpha_", None),
                "alphas_": getattr(model, "alphas_", None),
                "l1_ratio_": getattr(model, "l1_ratio_", None),
                "mse_path_": getattr(model, "mse_path_", None),
                "scoring_path_": getattr(model, "scoring_path_", None),
                "intercept_": getattr(model, "intercept_", None),
                "coef_": getattr(model, "coef_", None),
            }
        else:
            estimator_params = None
            fitted_params = None
            ckpt_scores = scores

        fit_and_score_kwargs.pop("X")
        fit_and_score_kwargs.pop("y")

        if "scorer" in fit_and_score_kwargs:
            fit_and_score_kwargs["scorer"] = list(
                fit_and_score_kwargs["scorer"].keys())

        ckpt_dict = {
            "scores": ckpt_scores,
            "fit_and_score_kwargs": fit_and_score_kwargs,
            "estimator_params": estimator_params,
            "fitted_params": fitted_params,
        }

        ddio.save(h5_file, ckpt_dict)
        return scores
Exemple #26
0
def main():
    import argparse
    parser = argparse.ArgumentParser(
        description=("Look inside HDF5 files. Works particularly well "
                     "for HDF5 files saved with deepdish.io.save()."),
        prog='ddls',
        epilog='example: ddls test.h5 -i /foo/bar --ipython')
    parser.add_argument('file', nargs='+', help='filename of HDF5 file')
    parser.add_argument('-d',
                        '--depth',
                        type=int,
                        default=4,
                        help='max depth, defaults to 4')
    parser.add_argument('-nc',
                        '--no-color',
                        action='store_true',
                        help='turn off bash colors')
    parser.add_argument('-i',
                        '--inspect',
                        metavar='GRP',
                        help='prints a specific variable (e.g. /data)')
    parser.add_argument('--ipython',
                        action='store_true',
                        help=('loads file into an IPython session.'
                              'Works with -i'))
    parser.add_argument('--raw',
                        action='store_true',
                        help=('prints the raw HDF5 structure for complex '
                              'data types, such as sparse matrices and pandas '
                              'data frames'))
    parser.add_argument(
        '-f',
        '--filter',
        type=str,
        help=('Print only entries that match this regular expression'))
    parser.add_argument('-l',
                        '--leaves-only',
                        action='store_true',
                        help=('Only print leaves'))
    parser.add_argument('-s',
                        '--summarize',
                        action='store_true',
                        help=('Print summary statistics of numpy arrays'))
    parser.add_argument('-c',
                        '--compression',
                        action='store_true',
                        help=('Print compression method for each array'))
    parser.add_argument('-v',
                        '--version',
                        action='version',
                        version='deepdish {} (io protocol {})'.format(
                            __version__, IO_VERSION))

    args = parser.parse_args()

    colorize = sys.stdout.isatty() and not args.no_color

    settings = {}
    if args.filter:
        settings['filter'] = args.filter

    if args.leaves_only:
        settings['leaves-only'] = True

    if args.summarize:
        settings['summarize'] = True

    if args.compression:
        settings['compression'] = True

    def single_file(files):
        if len(files) >= 2:
            s = 'Error: Select a single file when using --inspect'
            print(paint(s, 'red', colorize=colorize))
            sys.exit(1)
        return files[0]

    def run_ipython(fn, group=None, data=None):
        file_desc = paint(fn, 'yellow', colorize=colorize)
        if group is None:
            path_desc = file_desc
        else:
            path_desc = '{}:{}'.format(
                file_desc, paint(group, 'white', colorize=colorize))

        welcome = "Loaded {} into '{}':".format(
            path_desc, paint('data', 'blue', colorize=colorize))

        # Import deepdish for the session
        import deepdish as dd
        import IPython
        IPython.embed(header=welcome)

    i = 0
    if args.inspect is not None:
        fn = single_file(args.file)

        try:
            data = io.load(fn, args.inspect)
        except ValueError:
            s = 'Error: Could not find group: {}'.format(args.inspect)
            print(paint(s, 'red', colorize=colorize))
            sys.exit(1)
        if args.ipython:
            run_ipython(fn, group=args.inspect, data=data)
        else:
            print(data)
    elif args.ipython:
        fn = single_file(args.file)
        data = io.load(fn)
        run_ipython(fn, data=data)
    else:
        for f in args.file:
            # State that will be incremented
            settings['filtered_count'] = 0

            s = get_tree(f, raw=args.raw, settings=settings)
            if s is not None:
                if i > 0:
                    print()

                if len(args.file) >= 2:
                    print(paint(f, 'yellow', colorize=colorize))
                s.print(colorize=colorize,
                        max_level=args.depth,
                        settings=settings)
                i += 1

            if settings.get('filter'):
                print('Filtered on: {} ({} rows omitted)'.format(
                    paint(args.filter, 'purple', colorize=colorize),
                    paint(str(settings['filtered_count']),
                          'white',
                          colorize=colorize)))
Exemple #27
0
    def write_current_state(self):
        """
        Write the current state of the sampler to disk.

        The required information to reconstruct the state of the run are written
        to an hdf5 file.
        All but the most recent removed live point in the chain are removed from
        the sampler to reduce memory usage.
        This means it is necessary to not append the first live point to the
        file if updating a previous checkpoint.

        Parameters
        ----------
        sampler: `dynesty.NestedSampler`
            NestedSampler to write to disk.
        """
        check_directory_exists_and_if_not_mkdir(self.outdir)
        resume_file = '{}/{}_resume.h5'.format(self.outdir, self.label)

        if os.path.isfile(resume_file):
            saved = load(resume_file)

            current_state = dict(
                unit_cube_samples=np.vstack(
                    [saved['unit_cube_samples'], self.sampler.saved_u[1:]]),
                physical_samples=np.vstack(
                    [saved['physical_samples'], self.sampler.saved_v[1:]]),
                sample_likelihoods=np.concatenate(
                    [saved['sample_likelihoods'],
                     self.sampler.saved_logl[1:]]),
                sample_log_volume=np.concatenate([
                    saved['sample_log_volume'], self.sampler.saved_logvol[1:]
                ]),
                sample_log_weights=np.concatenate([
                    saved['sample_log_weights'], self.sampler.saved_logwt[1:]
                ]),
                cumulative_log_evidence=np.concatenate([
                    saved['cumulative_log_evidence'],
                    self.sampler.saved_logz[1:]
                ]),
                cumulative_log_evidence_error=np.concatenate([
                    saved['cumulative_log_evidence_error'],
                    self.sampler.saved_logzvar[1:]
                ]),
                cumulative_information=np.concatenate([
                    saved['cumulative_information'], self.sampler.saved_h[1:]
                ]),
                id=np.concatenate([saved['id'], self.sampler.saved_id[1:]]),
                it=np.concatenate([saved['it'], self.sampler.saved_it[1:]]),
                nc=np.concatenate([saved['nc'], self.sampler.saved_nc[1:]]),
                boundidx=np.concatenate(
                    [saved['boundidx'], self.sampler.saved_boundidx[1:]]),
                bounditer=np.concatenate(
                    [saved['bounditer'], self.sampler.saved_bounditer[1:]]),
                scale=np.concatenate(
                    [saved['scale'], self.sampler.saved_scale[1:]]),
            )

        else:
            current_state = dict(
                unit_cube_samples=self.sampler.saved_u,
                physical_samples=self.sampler.saved_v,
                sample_likelihoods=self.sampler.saved_logl,
                sample_log_volume=self.sampler.saved_logvol,
                sample_log_weights=self.sampler.saved_logwt,
                cumulative_log_evidence=self.sampler.saved_logz,
                cumulative_log_evidence_error=self.sampler.saved_logzvar,
                cumulative_information=self.sampler.saved_h,
                id=self.sampler.saved_id,
                it=self.sampler.saved_it,
                nc=self.sampler.saved_nc,
                boundidx=self.sampler.saved_boundidx,
                bounditer=self.sampler.saved_bounditer,
                scale=self.sampler.saved_scale,
            )

        current_state.update(ncall=self.sampler.ncall,
                             live_logl=self.sampler.live_logl,
                             iteration=self.sampler.it - 1,
                             live_u=self.sampler.live_u,
                             live_v=self.sampler.live_v,
                             nlive=self.sampler.nlive,
                             live_bound=self.sampler.live_bound,
                             live_it=self.sampler.live_it,
                             added_live=self.sampler.added_live)

        weights = np.exp(current_state['sample_log_weights'] -
                         current_state['cumulative_log_evidence'][-1])
        current_state[
            'posterior'] = self.external_sampler.utils.resample_equal(
                np.array(current_state['physical_samples']), weights)

        save(resume_file, current_state)

        self.sampler.saved_id = [self.sampler.saved_id[-1]]
        self.sampler.saved_u = [self.sampler.saved_u[-1]]
        self.sampler.saved_v = [self.sampler.saved_v[-1]]
        self.sampler.saved_logl = [self.sampler.saved_logl[-1]]
        self.sampler.saved_logvol = [self.sampler.saved_logvol[-1]]
        self.sampler.saved_logwt = [self.sampler.saved_logwt[-1]]
        self.sampler.saved_logz = [self.sampler.saved_logz[-1]]
        self.sampler.saved_logzvar = [self.sampler.saved_logzvar[-1]]
        self.sampler.saved_h = [self.sampler.saved_h[-1]]
        self.sampler.saved_nc = [self.sampler.saved_nc[-1]]
        self.sampler.saved_boundidx = [self.sampler.saved_boundidx[-1]]
        self.sampler.saved_it = [self.sampler.saved_it[-1]]
        self.sampler.saved_bounditer = [self.sampler.saved_bounditer[-1]]
        self.sampler.saved_scale = [self.sampler.saved_scale[-1]]
Exemple #28
0
def main():
    import argparse
    parser = argparse.ArgumentParser(
            description=("Look inside HDF5 files. Works particularly well "
                         "for HDF5 files saved with deepdish.io.save()."),
            prog='ddls',
            epilog='example: ddls test.h5 -i /foo/bar --ipython')
    parser.add_argument('file', nargs='+',
                        help='filename of HDF5 file')
    parser.add_argument('-d', '--depth', type=int, default=4,
                        help='max depth, defaults to 4')
    parser.add_argument('-nc', '--no-color', action='store_true',
                        help='turn off bash colors')
    parser.add_argument('-i', '--inspect', metavar='GRP',
                        help='prints a specific variable (e.g. /data)')
    parser.add_argument('--ipython', action='store_true',
                        help=('loads file into an IPython session.'
                              'Works with -i'))
    parser.add_argument('--raw', action='store_true',
                        help=('prints the raw HDF5 structure for complex '
                              'data types, such as sparse matrices and pandas '
                              'data frames'))
    parser.add_argument('-f', '--filter', type=str,
                        help=('Print only entries that match this regular expression'))
    parser.add_argument('-l', '--leaves-only', action='store_true',
                        help=('Only print leaves'))
    parser.add_argument('-s', '--summarize', action='store_true',
                        help=('Print summary statistics of numpy arrays'))
    parser.add_argument('-c', '--compression', action='store_true',
                        help=('Print compression method for each array'))
    parser.add_argument('-v', '--version', action='version',
                        version='deepdish {} (io protocol {})'.format(__version__, IO_VERSION))

    args = parser.parse_args()

    colorize = sys.stdout.isatty() and not args.no_color

    settings = {}
    if args.filter:
        settings['filter'] = args.filter

    if args.leaves_only:
        settings['leaves-only'] = True

    if args.summarize:
        settings['summarize'] = True

    if args.compression:
        settings['compression'] = True

    def single_file(files):
        if len(files) >= 2:
            s = 'Error: Select a single file when using --inspect'
            print(paint(s, 'red', colorize=colorize))
            sys.exit(1)
        return files[0]

    def run_ipython(fn, group=None, data=None):
        file_desc = paint(fn, 'yellow', colorize=colorize)
        if group is None:
            path_desc = file_desc
        else:
            path_desc = '{}:{}'.format(
                file_desc,
                paint(group, 'white', colorize=colorize))

        welcome = "Loaded {} into '{}':".format(
            path_desc,
            paint('data', 'blue', colorize=colorize))

        # Import deepdish for the session
        import deepdish as dd
        import IPython
        IPython.embed(header=welcome)

    i = 0
    if args.inspect is not None:
        fn = single_file(args.file)

        try:
            data = io.load(fn, args.inspect)
        except ValueError:
            s = 'Error: Could not find group: {}'.format(args.inspect)
            print(paint(s, 'red', colorize=colorize))
            sys.exit(1)
        if args.ipython:
            run_ipython(fn, group=args.inspect, data=data)
        else:
            print(data)
    elif args.ipython:
        fn = single_file(args.file)
        data = io.load(fn)
        run_ipython(fn, data=data)
    else:
        for f in args.file:
            # State that will be incremented
            settings['filtered_count'] = 0

            s = get_tree(f, raw=args.raw, settings=settings)
            if s is not None:
                if i > 0:
                    print()

                if len(args.file) >= 2:
                    print(paint(f, 'yellow', colorize=colorize))
                s.print(colorize=colorize, max_level=args.depth, settings=settings)
                i += 1

            if settings.get('filter'):
                print('Filtered on: {} ({} rows omitted)'.format(
                    paint(args.filter, 'purple', colorize=colorize),
                    paint(str(settings['filtered_count']), 'white', colorize=colorize)))
Exemple #29
0
def extract(filepath, keys):
    # with open(filepath, 'rb') as buf:
    #     d = io.load(buf)
    d = io.load(filepath)
    new_d = {k: v for k, v in d.iteritems() if k in keys}
    return new_d
Exemple #30
0
 def get_n_vars(train_paths, iptagger):
     # with open(train_paths[0], 'rb') as buf:
     #     d = io.load(buf)
     d = io.load(train_paths[0])
     return np.concatenate([d['X'], d[iptagger + '_vars']], axis=1).shape[1]
Exemple #31
0
    def parse_wmap(self, key='warping'):
        """ Retrieve the parameters to construct the distortion correction function
        """

        self.warping = dio.load(self.kfile)[key]
Exemple #32
0
    def parse_Emap(self, key='coeffs'):
        """ Retrieve the parameters to construct the energy conversion function.
        """

        self.poly_a = dio.load(self.Efile)['calibration'][key]
Exemple #33
0
def loadParmSet(fname):
    from deepdish.io import load
    return load(fname)
Exemple #34
0
def main(MODEL_FILE):

    test_dict = io.load('./data/test_dict_IPConv.h5')
    train_dict = io.load('./data/train_dict_IPConv.h5')

    X_train = train_dict['X']
    y_train = train_dict['y']
    n_features = X_train.shape[2]    
    
    X_test = test_dict['X']
    y_test = test_dict['y']
    ip3d = test_dict['ip3d'] # this is a df

    print 'Building model...'
    
    if (MODEL_FILE == 'CRNN'):
        graph = build_graph(n_features)

        model = Sequential()

        model.add(graph)

        model.add(Dense(64))

    elif (MODEL_FILE == 'RNN'):

        graph = build_graph_noCNN(n_features)
        
        model = Sequential()
        model.add(graph)

        model.add(Dense(64))
  
    model.add(Dropout(0.4))

    model.add(Highway(activation = 'relu'))

    model.add(Dropout(0.4)) #3
    model.add(Dense(4))

    model.add(Activation('softmax'))

    print 'Compiling model...'
    model.compile('adam', 'categorical_crossentropy')
    model.summary()

    print 'Training:'
    try:
        model.fit(X_train, y_train, batch_size=512,
            callbacks = [
                EarlyStopping(verbose=True, patience=20, monitor='val_loss'),
                ModelCheckpoint(MODEL_FILE + '-progress', monitor='val_loss', verbose=True, save_best_only=True)
            ],
        nb_epoch=200, 
        validation_split = 0.2, 
        show_accuracy=True) 
        
    except KeyboardInterrupt:
        print 'Training ended early.'

    # -- load in best network
    model.load_weights(MODEL_FILE + '-progress')

    print 'Saving weights...'
    model.save_weights('./weights/ip3d-replacement_' + MODEL_FILE + '.h5', overwrite=True)
    
    print 'Testing...'
    yhat = model.predict(X_test, verbose = True, batch_size = 512) 

    print 'Plotting ROC...'
    fg = plot_ROC(y_test, yhat, ip3d, MODEL_FILE)
    #plt.show()
    fg.savefig('./plots/roc_' + MODEL_FILE + '.pdf')
Exemple #35
0
def main():
    import argparse
    parser = argparse.ArgumentParser(
            description=("Look inside HDF5 files. Works particularly well "
                         "for HDF5 files saved with deepdish.io.save()."),
            prog='ddls',
            epilog='example: ddls test.h5 -i /foo/bar --ipython')
    parser.add_argument('file', nargs='+',
                        help='filename of HDF5 file')
    parser.add_argument('-d', '--depth', type=int, default=4,
                        help='max depth, defaults to 4')
    parser.add_argument('-nc', '--no-color', action='store_true',
                        help='turn off bash colors')
    parser.add_argument('-i', '--inspect', metavar='GRP',
                        help='prints a specific variable (e.g. /data)')
    parser.add_argument('--ipython', action='store_true',
                        help=('loads file into an IPython session.'
                              'Works with -i'))
    parser.add_argument('--raw', action='store_true',
                        help=('prints the raw HDF5 structure for complex '
                              'data types, such as sparse matrices and pandas '
                              'data frames'))
    parser.add_argument('-v', '--version', action='version',
                        version='deepdish {} (io protocol {})'.format(__version__, IO_VERSION))

    args = parser.parse_args()

    colorize = sys.stdout.isatty() and not args.no_color

    def single_file(files):
        if len(files) >= 2:
            s = 'Error: Select a single file when using --inspect'
            print(paint(s, 'red', colorize=colorize))
            sys.exit(1)
        return files[0]

    def run_ipython(fn, group=None, data=None):
        file_desc = paint(fn, 'yellow', colorize=colorize)
        if group is None:
            path_desc = file_desc
        else:
            path_desc = '{}:{}'.format(
                file_desc,
                paint(group, 'white', colorize=colorize))

        welcome = "Loaded {} into '{}':".format(
            path_desc,
            paint('data', 'blue', colorize=colorize))

        # Import deepdish for the session
        import deepdish as dd
        import IPython
        IPython.embed(header=welcome)

    i = 0
    if args.inspect is not None:
        fn = single_file(args.file)

        try:
            data = io.load(fn, args.inspect)
        except ValueError:
            s = 'Error: Could not find group: {}'.format(args.inspect)
            print(paint(s, 'red', colorize=colorize))
            sys.exit(1)
        if args.ipython:
            run_ipython(fn, group=args.inspect, data=data)
        else:
            print(data)
    elif args.ipython:
        fn = single_file(args.file)
        data = io.load(fn)
        run_ipython(fn, data=data)
    else:
        for f in args.file:
            s = get_tree(f, raw=args.raw)
            if s is not None:
                if i > 0:
                    print()

                if len(args.file) >= 2:
                    print(paint(f, 'yellow', colorize=colorize))
                s.print(colorize=colorize, max_level=args.depth)
                i += 1
Exemple #36
0
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from tqdm import tqdm
import deepdish.io as ddio

from utils import *
from config import get_config

config = get_config()

classes = ['A', 'E', 'j', 'L', 'N', 'P', 'R', 'V']

input_shape = (config.input_size, 1)

input_train = ddio.load('dataset/traindata_tri.hdf5')
target_train = ddio.load('dataset/trainlabel_tri.hdf5')
input_test = ddio.load('dataset/testdata_tri.hdf5')
target_test = ddio.load('dataset/testlabel_tri.hdf5')

# Data & model configuration
batch_size = config.batch
no_epochs = config.ae_epochs
validation_split = 0.25
verbosity = 1
latent_dim = 2
num_channels = 1

# # =================
# # Encoder
# # =================
 def get_n_vars(train_paths, iptagger):
     # with open(train_paths[0], 'rb') as buf:
     #     d = io.load(buf)
     d = io.load(train_paths[0])
     return np.concatenate([d['X'], d[iptagger + '_vars']], axis=1).shape[1]
Exemple #38
0
def main(embed_size, normed, input_id, run_name):

    configure_logging()
    logger = logging.getLogger("RNNIP Training")

    logger.info("Loading hdf5's")
    test_dict = io.load(os.path.join('data', 'test_dict_' + input_id + '.h5'))
    train_dict = io.load(os.path.join('data', 'train_dict_' + input_id + '.h5'))
    
    X_train_stream0 = train_dict['grade']
    X_train_stream1 = train_dict['X']
    y_train = train_dict['y']    

    X_test_stream0 = test_dict['grade']
    X_test_stream1 = test_dict['X']
    y_test = test_dict['y']

    ip3d = test_dict['ip3d'] 

    logger.info('Building model')
    model = build_model(X_train_stream0, X_train_stream1, embed_size, normed)
    model.summary()

    logger.info('Compiling model')
    model.compile('adam', 'categorical_crossentropy', metrics=['accuracy'])

    #-- if the pre-trained model exists, load it in, otherwise start from scratch
    safe_mkdir('weights')
    weights_file = os.path.join('weights', 'rnnip_' + run_name +'.h5')
    try:
        model.load_weights(weights_file)
        logger.info('Loaded pre-trained model from ' + weights_file)
    except IOError:
        logger.info('No pre-trained model found in ' + weights_file)

    logger.info('Training:')
    try:
        model.fit([X_train_stream0, X_train_stream1], y_train, batch_size=512,
            callbacks = [
                EarlyStopping(verbose=True, patience=20, monitor='val_loss'),
                ModelCheckpoint(
                    weights_file, 
                    monitor='val_loss', verbose=True, save_best_only=True
                )
            ],
        epochs=300, 
        validation_split = 0.2) 
        
    except KeyboardInterrupt:
        logger.info('Training ended early.')

    # -- load in best network
    logger.info('Loading best epoch')
    model.load_weights(weights_file)

    json_string = model.to_json()
    safe_mkdir('json_models')
    open(os.path.join('json_models', run_name +'.json'), 'w').write(json_string)

    logger.info('Testing')
    safe_mkdir('predictions')
    yhat = model.predict([X_test_stream0, X_test_stream1], verbose=True, batch_size=10000) 
    io.save(os.path.join('predictions', 'yhat'+ run_name +'.h5'), yhat) 
     
    logger.info('Plotting ROC')
    plot_ROC(y_test, yhat, ip3d, run_name)
def extract(filepath, keys):
    # with open(filepath, 'rb') as buf:
    #     d = io.load(buf)
    d = io.load(filepath)
    new_d = {k:v for k,v in d.iteritems() if k in keys}
    return new_d
Exemple #40
0
def main(MODEL_FILE):

    test_dict = io.load('./data/test_dict_IPConv.h5')
    train_dict = io.load('./data/train_dict_IPConv.h5')

    X_train = train_dict['X']
    y_train = train_dict['y']
    n_features = X_train.shape[2]    

    X_test = test_dict['X']
    y_test = test_dict['y']

    # this is a df
    ip3d = test_dict['ip3d'] 


    print 'Building model...'
    
    if (MODEL_FILE == 'CRNN'):
        graph = build_graph(n_features)

        model = Sequential()

        model.add(graph)
        # removing because of tensorflow
        #model.add(MaxoutDense(64, 5, input_shape=graph.nodes['dropout'].output_shape[1:]))
        model.add(Dense(64))

    elif (MODEL_FILE == 'RNN'):

        model = Sequential()
        model.add(GRU(25, input_shape=(N_TRACKS, n_features))) #GRU
        model.add(Dropout(0.2))
    
        # removing because of tensorflow
        #model.add(MaxoutDense(64, 5))  #, input_shape=graph.nodes['dropout'].output_shape[1:]))
        model.add(Dense(64))

  
    model.add(Dropout(0.4))

    model.add(Highway(activation = 'relu'))

    model.add(Dropout(0.3))
    model.add(Dense(4))

    model.add(Activation('softmax'))

    print 'Compiling model...'
    model.compile('adam', 'categorical_crossentropy')
    model.summary()

    print 'Training:'
    try:
        model.fit(X_train, y_train, batch_size=512,
            callbacks = [
                EarlyStopping(verbose=True, patience=20, monitor='val_loss'),
                ModelCheckpoint(MODEL_FILE + '-progress', monitor='val_loss', verbose=True, save_best_only=True)
            ],
        nb_epoch=2, 
        validation_split = 0.2, 
        show_accuracy=True) 
        
    except KeyboardInterrupt:
        print 'Training ended early.'

    # -- load in best network
    model.load_weights(MODEL_FILE + '-progress')
    

    print 'Saving protobuf'
    # write out to a new directory called models
    # the actual graph file is graph.pb
    # the graph def is in the global session
    import tensorflow as tf
    import keras.backend.tensorflow_backend as tfbe

    sess = tfbe._SESSION

    saver = tf.train.Saver()
    tf.train.write_graph(sess.graph_def, 'models/', 'graph.pb', as_text=False)    

    save_path = saver.save(sess, "./model-weights.ckpt")
    print "Model saved in file: %s" % save_path
    
    print saver.as_saver_def().filename_tensor_name
    print saver.as_saver_def().restore_op_name

    print model.get_output()
    print 'Saving weights...'
    model.save_weights('./weights/ip3d-replacement_' + MODEL_FILE + '.h5', overwrite=True)

    json_string = model.to_json()
    open(MODEL_FILE + '.json', 'w').write(json_string)

    print 'Testing...'
    yhat = model.predict(X_test, verbose = True, batch_size = 512) 

    print 'Plotting ROC...'
    fg = plot_ROC(y_test, yhat, ip3d, MODEL_FILE)
    #plt.show()
    fg.savefig('./plots/roc_' + MODEL_FILE + '.pdf')