params=params,
                  resample=cv,
                  structure="mask.nii",
                  beta_start="beta_start.npy",
                  structure_linear_operator_tv="Atv.npz",
                  map_output="model_selectionCV",
                  user_func=user_func_filename,
                  reduce_input="results/*/*",
                  reduce_group_by="params",
                  reduce_output="model_selectionCV.csv")
    json.dump(config, open(os.path.join(WD, "config_dCV.json"), "w"))

    # Build utils files: sync (push/pull) and PBS
    import brainomics.cluster_gabriel as clust_utils
    sync_push_filename, sync_pull_filename, _ = \
        clust_utils.gabriel_make_sync_data_files(WD, wd_cluster=WD_CLUSTER)
    cmd = "mapreduce.py --map  %s/config_dCV.json" % WD_CLUSTER
    clust_utils.gabriel_make_qsub_job_files(WD, cmd, walltime="2500:00:00")


#############################################################################
def load_globals(config):
    import mapreduce as GLOBAL  # access to global variables
    GLOBAL.DATA = GLOBAL.load_data(config["data"])
    GLOBAL.A = LinearOperatorNesterov(
        filename=config["structure_linear_operator_tv"])
    GLOBAL.DATA = GLOBAL.load_data(config["data"])
    GLOBAL.DIR = config["map_output"]
    GLOBAL.BETA_START = np.load(config["beta_start"])

    config = dict(data=dict(X=INPUT_DATA_X, y=INPUT_DATA_y),
                  params=params, resample=rndperm,
                  mask_filename=INPUT_MASK_PATH,
                  penalty_start = 3,
                  map_output="rndperm",
                  user_func=user_func_filename,
                  #reduce_input="rndperm/*/*",
                  reduce_group_by="params",
                  reduce_output="ADAS11-MCIc-CTL_rndperm.csv")
    json.dump(config, open(os.path.join(WD, "config_rndperm.json"), "w"))

    #############################################################################
    # Build utils files: sync (push/pull) and PBS
    import brainomics.cluster_gabriel as clust_utils
    sync_push_filename, sync_pull_filename, WD_CLUSTER = \
        clust_utils.gabriel_make_sync_data_files(WD)
    cmd = "mapreduce.py --map  %s/config_rndperm.json" % WD_CLUSTER
    clust_utils.gabriel_make_qsub_job_files(WD, cmd, suffix="_rndperm")
    #############################################################################
    # Sync to cluster
    print "Sync data to gabriel.intra.cea.fr: "
    os.system(sync_push_filename)
    #############################################################################
    print "# Start by running Locally with 2 cores, to check that everything os OK)"
    print "Interrupt after a while CTL-C"
    print "mapreduce.py --map %s/config_rndperm.json --ncore 2" % WD
    #os.system("mapreduce.py --mode map --config %s/config.json" % WD)
    print "# 1) Log on gabriel:"
    print 'ssh -t gabriel.intra.cea.fr'
    print "# 2) Run one Job to test"
    print "qsub -I"
Beispiel #3
0
def init():
    INPUT_DATA_X = '/neurospin/brainomics/2017_asd_charles/Freesurfer/data/X.npy'
    INPUT_DATA_y = '/neurospin/brainomics/2017_asd_charles/Freesurfer/data/y.npy'
    INPUT_MASK_PATH = '/neurospin/brainomics/2017_asd_charles/Freesurfer/data/mask.npy'
    INPUT_LINEAR_OPE_PATH = '/neurospin/brainomics/2017_asd_charles/Freesurfer/data/Atv.npz'

    os.makedirs(WD, exist_ok=True)
    shutil.copy(INPUT_DATA_X, WD)
    shutil.copy(INPUT_DATA_y, WD)
    shutil.copy(INPUT_MASK_PATH, WD)
    shutil.copy(INPUT_LINEAR_OPE_PATH, WD)

    ## Create config file
    y = np.load(INPUT_DATA_y)
    X = np.load(INPUT_DATA_X)

    cv_outer = [[tr, te] for tr, te in StratifiedKFold(
        y.ravel(), n_folds=NFOLDS_OUTER, random_state=42)]
    if cv_outer[0] is not None:  # Make sure first fold is None
        cv_outer.insert(0, None)
        null_resampling = list()
        null_resampling.append(np.arange(0, len(y))), null_resampling.append(
            np.arange(0, len(y)))
        cv_outer[0] = null_resampling

    import collections
    cv = collections.OrderedDict()
    for cv_outer_i, (tr_val, te) in enumerate(cv_outer):
        if cv_outer_i == 0:
            cv["refit/refit"] = [tr_val, te]
        else:
            cv["cv%02d/refit" % (cv_outer_i - 1)] = [tr_val, te]
            cv_inner = StratifiedKFold(y[tr_val].ravel(),
                                       n_folds=NFOLDS_INNER,
                                       random_state=42)
            for cv_inner_i, (tr, val) in enumerate(cv_inner):
                cv["cv%02d/cvnested%02d" %
                   ((cv_outer_i - 1), cv_inner_i)] = [tr_val[tr], tr_val[val]]
    for k in cv:
        cv[k] = [cv[k][0].tolist(), cv[k][1].tolist()]

    print(list(cv.keys()))

    tv_range = [0.0, .1, 0.2, 0.3, 0.4, 0.5, .6, 0.7, .8, 0.9, 1.0]
    ratios = np.array([[1., 0., 1], [0., 1., 1], [.5, .5, 1], [.1, .9, 1],
                       [0.9, 0.1, 1]])
    alphas = [.1, .01, 1.0]

    l1l2tv = [
        np.array([[float(1 - tv), float(1 - tv), tv]]) * ratios
        for tv in tv_range
    ]
    l1l2tv = np.concatenate(l1l2tv)
    alphal1l2tv = np.concatenate([
        np.c_[np.array([[alpha]] * l1l2tv.shape[0]), l1l2tv]
        for alpha in alphas
    ])
    # remove duplicates
    alphal1l2tv = pd.DataFrame(alphal1l2tv)
    alphal1l2tv = alphal1l2tv[~alphal1l2tv.duplicated()]
    alphal1l2tv.shape == (153, 4)
    # Remove too large l1 leading to a null soulution
    scaler = preprocessing.StandardScaler().fit(X)
    Xs = scaler.transform(X)
    l1max = utils.penalties.l1_max_logistic_loss(Xs[:, penalty_start:],
                                                 y,
                                                 mean=True,
                                                 class_weight="auto")
    #  0.11406144805642522
    alphal1l2tv = alphal1l2tv[alphal1l2tv[0] * alphal1l2tv[1] <= l1max]
    params = [np.round(row, 5).tolist() for row in alphal1l2tv.values.tolist()]
    assert pd.DataFrame(params).duplicated().sum() == 0
    assert len(params) == 127
    print("NB run=", len(params) * len(cv))
    # 4743 => 4216
    user_func_filename = "/home/ad247405/git/scripts/2017_asd_charles/03_enettv.py"

    config = dict(data=dict(X=os.path.basename(INPUT_DATA_X),
                            y=os.path.basename(INPUT_DATA_y)),
                  params=params,
                  resample=cv,
                  structure=os.path.basename(INPUT_MASK_PATH),
                  structure_linear_operator_tv="Atv.npz",
                  map_output="model_selectionCV",
                  user_func=user_func_filename,
                  reduce_input="results/*/*",
                  reduce_group_by="params",
                  reduce_output="model_selectionCV.csv")
    json.dump(config, open(os.path.join(WD, "config_dCV.json"), "w"))

    # Build utils files: sync (push/pull) and PBS
    import brainomics.cluster_gabriel as clust_utils
    sync_push_filename, sync_pull_filename, _ = \
        clust_utils.gabriel_make_sync_data_files(WD, wd_cluster=WD_CLUSTER)
    cmd = "mapreduce.py --map  %s/config_dCV.json" % WD_CLUSTER
    clust_utils.gabriel_make_qsub_job_files(WD, cmd, walltime="10000:00:00")
Beispiel #4
0
    config_full_filename = os.path.join(full_output_dir, filename)
    json.dump(config, open(config_full_filename, "w"))
    return config

#################
# Actual script #
#################

if __name__ == "__main__":
    if not os.path.exists(OUTPUT_DIR):
        os.makedirs(OUTPUT_DIR)
    #Retreive variables
    X= np.load(INPUT_DATA_X)    
    y = np.ones(X.shape[0])
    shutil.copy(INPUT_DATA_X, os.path.join(OUTPUT_DIR,"5_folds_all30yo_scz"))
    shutil.copy(INPUT_MASK, os.path.join(OUTPUT_DIR,"5_folds_all30yo_scz"))

    #############################################################################
       # Create config files
    config_5folds = create_config(y, *(CONFIGS[0]))

    DEBUG = False
    if DEBUG:
        run_test(OUTPUT_DIR, config_5folds)
        
     # Build utils files: sync (push/pull) and PBS

    sync_push_filename, sync_pull_filename, WD_CLUSTER = \
        clust_utils.gabriel_make_sync_data_files(os.path.join(OUTPUT_DIR,"5_folds_all30yo_scz"))
    cmd = "mapreduce.py --map  %s/config.json" % WD_CLUSTER
    clust_utils.gabriel_make_qsub_job_files(os.path.join(OUTPUT_DIR,"5_folds_all30yo_scz"), cmd,walltime = "250:00:00")    
Beispiel #5
0
    config = dict(data=dict(X='X.npy', z='z.npy'),
                  params=params, resample=cv,
                  structure="",
                  map_output="results",
                  user_func=user_func_filename,
                  reduce_input="results/*/*",
                  reduce_group_by="results/.*/(.*)",
                  reduce_output="results-2.csv")
    json.dump(config, open(os.path.join(WD, "config.json"), "w"))

    #########################################################################
    # Build utils files: sync (push/pull) and PBS
    sys.path.append(os.path.join(os.getenv('HOME'), 'gits', 'scripts'))
    import brainomics.cluster_gabriel as clust_utils
    sync_push_filename, sync_pull_filename, WD_CLUSTER = \
        clust_utils.gabriel_make_sync_data_files(WD, user="******")
    cmd = "mapreduce.py -m %s/config.json  --ncore 12" % WD_CLUSTER
    clust_utils.gabriel_make_qsub_job_files(WD, cmd)
    #########################################################################
    # Synchronize to cluster
    print "Sync data to gabriel.intra.cea.fr: "
    os.system(sync_push_filename)
    #########################################################################
    print "# Map"
    print "mapreduce.py -m %s/config.json --ncore 12" % WD
    #os.system("mapreduce.py --mode map --config %s/config.json" % WD)
    print "# Run on cluster Gabriel"
    print "qsub job_Global_long.pbs"
    #########################################################################
    print "# Reduce"
    print "mapreduce.py -r %s/config.json" % WD_CLUSTER
Beispiel #6
0
# Actual script #
#################

if __name__ == "__main__":
    if not os.path.exists(OUTPUT_DIR):
        os.makedirs(OUTPUT_DIR)
    #Retreive variables
    X = np.load(INPUT_DATA_X)
    y = np.ones(X.shape[0])
    shutil.copy(INPUT_DATA_X,
                os.path.join(OUTPUT_DIR, "vbm_pcatv_all+VIP_controls_30yo"))
    shutil.copy(INPUT_MASK,
                os.path.join(OUTPUT_DIR, "vbm_pcatv_all+VIP_controls_30yo"))

    #############################################################################
    # Create config files
    config_5folds = create_config(y, *(CONFIGS[0]))

    DEBUG = False
    if DEBUG:
        run_test(OUTPUT_DIR, config_5folds)

    # Build utils files: sync (push/pull) and PBS

    sync_push_filename, sync_pull_filename, WD_CLUSTER = \
        clust_utils.gabriel_make_sync_data_files(os.path.join(OUTPUT_DIR,"vbm_pcatv_all+VIP_controls_30yo"))
    cmd = "mapreduce.py --map  %s/config.json" % WD_CLUSTER
    clust_utils.gabriel_make_qsub_job_files(os.path.join(
        OUTPUT_DIR, "vbm_pcatv_all+VIP_controls_30yo"),
                                            cmd,
                                            walltime="250:00:00")
def init():
    INPUT_DATA_X = os.path.join('X.npy')
    INPUT_DATA_y = os.path.join('y.npy')
    INPUT_MASK_PATH = os.path.join("mask.nii")
    NFOLDS_INNER, NFOLDS_OUTER = 5, 5
    #WD = os.path.join(WD, 'logistictvenet_5cv')
    if not os.path.exists(WD):
        os.makedirs(WD)

    os.chdir(WD)

    #############################################################################
    ## Create config file
    y = np.load(INPUT_DATA_y)
    X = np.load(INPUT_DATA_X)
    from parsimony.utils.penalties import l1_max_logistic_loss
    assert l1_max_logistic_loss(X[:, 2:], y) == 0.18046445850741652
    if os.path.exists(config_filenane()):
        old_conf = json.load(open(config_filenane()))
        cv = old_conf["resample"]
    else:
        cv_outer = [[tr, te] for tr, te in StratifiedKFold(
            y.ravel(), n_folds=NFOLDS_OUTER, random_state=42)]
        """
        cv_outer = [[np.array(tr), np.array(te)] for tr,te in json.load(open("/neurospin/brainomics/2013_adni/MCIc-CTL_cs_s/config.json", "r"))["resample"][1:]]
        """
        import collections
        cv = collections.OrderedDict()
        for cv_outer_i, (tr_val, te) in enumerate(cv_outer):
            cv["cv%02d/refit" % cv_outer_i] = [tr_val, te]
            cv_inner = StratifiedKFold(y[tr_val].ravel(),
                                       n_folds=NFOLDS_INNER,
                                       random_state=42)
            for cv_inner_i, (tr, val) in enumerate(cv_inner):
                cv["cv%02d/cvnested%02d" %
                   (cv_outer_i, cv_inner_i)] = [tr_val[tr], tr_val[val]]
        for k in cv:
            cv[k] = [cv[k][0].tolist(), cv[k][1].tolist()]

    print cv.keys()
    # Some QC
    N = float(len(y))
    p0 = np.sum(y == 0) / N
    p1 = np.sum(y == 1) / N
    for k in cv:
        tr, val = cv[k]
        tr, val = np.array(tr), np.array(val)
        print k, "\t: tr+val=", len(tr) + len(val)
        assert not set(tr).intersection(val)
        assert abs(np.sum(y[tr] == 0) / float(len(y[tr])) - p0) < 0.01
        assert abs(np.sum(y[tr] == 1) / float(len(y[tr])) - p1) < 0.01
        if k.count("refit"):
            te = val
            assert len(tr) + len(te) == len(y)
            assert abs(len(y[tr]) / N - (1 - 1. / NFOLDS_OUTER)) < 0.01
        else:
            te = np.array(cv[k.split("/")[0] + "/refit"])[1]
            assert abs(
                len(y[tr]) / N - (1 - 1. / NFOLDS_OUTER) *
                (1 - 1. / NFOLDS_INNER)) < 0.01
            assert not set(tr).intersection(te)
            assert not set(val).intersection(te)
            len(tr) + len(val) + len(te) == len(y)

    tv_ratios = [0., .2, .8]
    l1_ratios = [np.array([1., .1, .9, 1]),
                 np.array([1., .9, .1, 1])]  # [alpha, l1 l2 tv]
    alphas_l1l2tv = [.01, .1]
    alphas_l2tv = [round(alpha, 10) for alpha in 10.**np.arange(-2, 4)]
    k_range = [-1]
    l1l2tv = [
        np.array([alpha, float(1 - tv),
                  float(1 - tv), tv]) * l1_ratio for alpha in alphas_l1l2tv
        for tv in tv_ratios for l1_ratio in l1_ratios
    ]
    # specific case for without l1 since it supports larger penalties
    l2tv = [
        np.array([alpha, 0., float(1 - tv), tv]) for alpha in alphas_l2tv
        for tv in tv_ratios
    ]
    params = l1l2tv + l2tv
    params = [param.tolist() + [k] for k in k_range for param in params]
    params = {"_".join([str(p) for p in param]): param for param in params}
    #assert len(params) == 30
    user_func_filename = os.path.join(os.environ["HOME"], "git", "scripts",
                                      "2013_adni", "MCIc-CTL",
                                      "02_tvenet_modselectcv_cs_s.py")
    #print __file__, os.path.abspath(__file__)
    print "user_func", user_func_filename
    #import sys
    #sys.exit(0)
    # Use relative path from config.json
    config = dict(
        data=dict(X=INPUT_DATA_X, y=INPUT_DATA_y),
        params=params,
        resample=cv,
        mask_filename=INPUT_MASK_PATH,
        penalty_start=2,
        map_output="modselectcv",
        user_func=user_func_filename,
        #reduce_input="rndperm/*/*",
        reduce_group_by="user_defined",
        reduce_output="MCIc-CTL_cs_s_modselectcv.csv")
    json.dump(config, open(os.path.join(WD, "config_modselectcv.json"), "w"))

    #############################################################################
    # Build utils files: sync (push/pull) and PBS
    import brainomics.cluster_gabriel as clust_utils
    sync_push_filename, sync_pull_filename, WD_CLUSTER = \
        clust_utils.gabriel_make_sync_data_files(WD)
    cmd = "mapreduce.py --map  %s/config_modselectcv.json" % WD_CLUSTER
    clust_utils.gabriel_make_qsub_job_files(WD, cmd)
    #############################################################################
    # Sync to cluster
    print "Sync data to gabriel.intra.cea.fr: "
    os.system(sync_push_filename)
    #############################################################################
    print "# Start by running Locally with 2 cores, to check that everything os OK)"
    print "Interrupt after a while CTL-C"
    print "mapreduce.py --map %s/config_modselectcv.json --ncore 2" % WD
    #os.system("mapreduce.py --mode map --config %s/config.json" % WD)
    print "# 1) Log on gabriel:"
    print 'ssh -t gabriel.intra.cea.fr'
    print "# 2) Run one Job to test"
    print "qsub -I"
    print "cd %s" % WD_CLUSTER
    print "./job_Global_long.pbs"
    print "# 3) Run on cluster"
    print "qsub job_Global_long.pbs"
    print "# 4) Log out and pull Pull"
    print "exit"
    print sync_pull_filename
    #############################################################################
    print "# Reduce"
    print "mapreduce.py --reduce %s/config_modselectcv.json" % WD
Beispiel #8
0
def init():
    INPUT_DATA_X = '/neurospin/brainomics/2016_schizConnect/analysis/NUSDAST/VBM/data/data_30yo/X.npy'
    INPUT_DATA_y = '/neurospin/brainomics/2016_schizConnect/analysis/NUSDAST/VBM/data/data_30yo/y.npy'
    INPUT_MASK_PATH = '/neurospin/brainomics/2016_schizConnect/analysis/NUSDAST/VBM/data/data_30yo/mask.nii'
    INPUT_LINEAR_OPE_PATH = '/neurospin/brainomics/2016_schizConnect/analysis/NUSDAST/VBM/data/data_30yo/Atv.npz'
    INPUT_CSV = '/neurospin/brainomics/2016_schizConnect/analysis/NUSDAST/VBM/population_30yo.csv'

    os.makedirs(WD, exist_ok=True)
    shutil.copy(INPUT_DATA_X, WD)
    shutil.copy(INPUT_DATA_y, WD)
    shutil.copy(INPUT_MASK_PATH, WD)
    shutil.copy(INPUT_LINEAR_OPE_PATH, WD)

    ## Create config file
    y = np.load(INPUT_DATA_y)
    X = np.load(INPUT_DATA_X)

    cv_outer = [[tr, te] for tr, te in StratifiedKFold(
        y.ravel(), n_folds=NFOLDS_OUTER, random_state=42)]
    if cv_outer[0] is not None:  # Make sure first fold is None
        cv_outer.insert(0, None)
        null_resampling = list()
        null_resampling.append(np.arange(0, len(y))), null_resampling.append(
            np.arange(0, len(y)))
        cv_outer[0] = null_resampling

    import collections
    cv = collections.OrderedDict()
    for cv_outer_i, (tr_val, te) in enumerate(cv_outer):
        if cv_outer_i == 0:
            cv["refit/refit"] = [tr_val, te]
        else:
            cv["cv%02d/refit" % (cv_outer_i - 1)] = [tr_val, te]
            cv_inner = StratifiedKFold(y[tr_val].ravel(),
                                       n_folds=NFOLDS_INNER,
                                       random_state=42)
            for cv_inner_i, (tr, val) in enumerate(cv_inner):
                cv["cv%02d/cvnested%02d" %
                   ((cv_outer_i - 1), cv_inner_i)] = [tr_val[tr], tr_val[val]]
    for k in cv:
        cv[k] = [cv[k][0].tolist(), cv[k][1].tolist()]

    print(list(cv.keys()))

    #grid of ols paper
    gn_range = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
    # ratios = np.array([[1., 0., 1], [0., 1., 1], [.5, .5, 1], [.1, .9, 1], [0.9, 0.1, 1]])
    # gn_range = [0.0, 0.2, 0.4, 0.6, 0.8, 1.0]
    # gn_range = [0.0, 0.2, 0.8, 1.0]
    ratios = np.array([[1., 0., 1], [0., 1., 1], [.5, .5, 1], [.1, .90, 1],
                       [0.9, 0.1, 1], [0.2, 0.8, 1], [0.3, 0.7, 1]])
    # ratios = np.array([[1., 0., 1], [0., 1., 1], [.5, .5, 1], [.1, .9, 1], [0.9, 0.1, 1]])
    alphas = [.1, .01, 1.0]

    l1l2s = [
        np.array([[float(1 - gn), float(1 - gn), gn]]) * ratios
        for gn in gn_range
    ]
    l1l2s = np.concatenate(l1l2s)
    alphal1l2s = np.concatenate([
        np.c_[np.array([[alpha]] * l1l2s.shape[0]), l1l2s] for alpha in alphas
    ])

    params = [np.round(params, 2).tolist() for params in alphal1l2s]
    print("NB run=", len(params) * len(cv))
    user_func_filename = "/home/ed203246/git/scripts/2016_schizConnect/supervised_analysis/NUSDAST/VBM/30yo_scripts/02_enetgn_NUDAST.py"

    config = dict(data=dict(X="X.npy", y="y.npy"),
                  params=params,
                  resample=cv,
                  structure="mask.nii",
                  structure_linear_operator_tv="Atv.npz",
                  map_output="model_selectionCV",
                  user_func=user_func_filename,
                  reduce_input="results/*/*",
                  reduce_group_by="params",
                  reduce_output="model_selectionCV.csv")
    json.dump(config, open(os.path.join(WD, "config_dCV.json"), "w"))

    # Build utils files: sync (push/pull) and PBS
    import brainomics.cluster_gabriel as clust_utils
    sync_push_filename, sync_pull_filename, _ = \
        clust_utils.gabriel_make_sync_data_files(WD, wd_cluster=WD_CLUSTER)
    cmd = "mapreduce.py --map  %s/config_dCV.json" % WD_CLUSTER
    clust_utils.gabriel_make_qsub_job_files(WD, cmd, walltime="250:00:00")
Beispiel #9
0
# Actual script #
#################

if __name__ == "__main__":
    if not os.path.exists(OUTPUT_DIR):
        os.makedirs(OUTPUT_DIR)
    #Retreive variables
    X = np.load(INPUT_DATA_X)
    y = np.ones(X.shape[0])
    shutil.copy(INPUT_DATA_X,
                os.path.join(OUTPUT_DIR, "FS_pcatv_NMoprhCH_controls"))
    shutil.copy(INPUT_MASK,
                os.path.join(OUTPUT_DIR, "FS_pcatv_NMoprhCH_controls"))

    #############################################################################
    # Create config files
    config_5folds = create_config(y, *(CONFIGS[0]))

    DEBUG = False
    if DEBUG:
        run_test(OUTPUT_DIR, config_5folds)

    # Build utils files: sync (push/pull) and PBS

    sync_push_filename, sync_pull_filename, WD_CLUSTER = \
        clust_utils.gabriel_make_sync_data_files(os.path.join(OUTPUT_DIR,"FS_pcatv_NMoprhCH_controls"))
    cmd = "mapreduce.py --map  %s/config.json" % WD_CLUSTER
    clust_utils.gabriel_make_qsub_job_files(os.path.join(
        OUTPUT_DIR, "FS_pcatv_NMoprhCH_controls"),
                                            cmd,
                                            walltime="250:00:00")
Beispiel #10
0
            os.makedirs(output_dir)

        # Copy the learning data
        src_datafile = os.path.join(input_dir, INPUT_STD_DATASET_FILE)
        shutil.copy(src_datafile, output_dir)

        # Copy the objects masks
        for i in range(N_COMP):
            filename = INPUT_OBJECT_MASK_FILE_FORMAT.format(o=i)
            src_filename = os.path.join(INPUT_MASK_DIR, filename)
            dst_filename = os.path.join(output_dir, filename)
            shutil.copy(src_filename, dst_filename)

        # Create files to synchronize with the cluster
        sync_push_filename, sync_pull_filename, CLUSTER_WD = \
            clust_utils.gabriel_make_sync_data_files(output_dir,
                                                     user="******")

        # Create config file
        user_func_filename = os.path.abspath(__file__)

        config = OrderedDict([('data', dict(X=INPUT_STD_DATASET_FILE)),
                              ('im_shape', dice5_data.SHAPE),
                              ('params', correct_params), ('l1_max', l1_max),
                              ('n_comp', N_COMP), ('resample', resamplings),
                              ('map_output', "results"),
                              ('user_func', user_func_filename), ('ncore', 4),
                              ('reduce_group_by', "params"),
                              ('reduce_output', "results.csv")])
        config_full_filename = os.path.join(output_dir, "config.json")
        json.dump(config, open(config_full_filename, "w"), **JSON_DUMP_OPT)
Beispiel #11
0
def init():
    INPUT_DATA_X = '/neurospin/brainomics/2016_schizConnect/analysis/NUSDAST/VBM/data/data_30yo/X.npy'
    # remove covariates from data

    INPUT_DATA_y = '/neurospin/brainomics/2016_schizConnect/analysis/NUSDAST/VBM/data/data_30yo/y.npy'
    INPUT_MASK_PATH = '/neurospin/brainomics/2016_schizConnect/analysis/NUSDAST/VBM/data/data_30yo/mask.nii'
    INPUT_LINEAR_OPE_PATH = '/neurospin/brainomics/2016_schizConnect/analysis/NUSDAST/VBM/data/data_30yo/Atv.npz'
    NFOLDS_OUTER = 5
    NFOLDS_INNER = 5
    os.makedirs(WD, exist_ok=True)
    shutil.copy(INPUT_DATA_y, WD)
    shutil.copy(INPUT_MASK_PATH, WD)
    shutil.copy(INPUT_LINEAR_OPE_PATH, WD)
    X = np.load(INPUT_DATA_X)
    np.save(os.path.join(WD, "X.npy"), X[:, penalty_start:])

    #start_vector=weights.RandomUniformWeights(normalise=True,seed= 40004)
    #np.save(os.path.join(WD,"start_vector.npy"),start_vector)

    y = np.load(INPUT_DATA_y)

    cv_outer = [[tr, te] for tr, te in StratifiedKFold(
        y.ravel(), n_folds=NFOLDS_OUTER, random_state=42)]
    if cv_outer[0] is not None:  # Make sure first fold is None
        cv_outer.insert(0, None)
        null_resampling = list()
        null_resampling.append(np.arange(0, len(y))), null_resampling.append(
            np.arange(0, len(y)))
        cv_outer[0] = null_resampling

#
    import collections
    cv = collections.OrderedDict()
    for cv_outer_i, (tr_val, te) in enumerate(cv_outer):
        if cv_outer_i == 0:
            cv["all/all"] = [tr_val, te]

        else:
            cv["cv%02d/all" % (cv_outer_i - 1)] = [tr_val, te]
            cv_inner = StratifiedKFold(y[tr_val].ravel(),
                                       n_folds=NFOLDS_INNER,
                                       random_state=42)
            for cv_inner_i, (tr, val) in enumerate(cv_inner):
                cv["cv%02d/cvnested%02d" %
                   ((cv_outer_i - 1), cv_inner_i)] = [tr_val[tr], tr_val[val]]
    for k in cv:
        cv[k] = [cv[k][0].tolist(), cv[k][1].tolist()]

    print(list(cv.keys()))

    params = [[0.01, 0.72, 0.08, 0.2], [0.01, 0.08, 0.72, 0.2],
              [0.01, 0.18, 0.02, 0.8], [0.1, 0.18, 0.02, 0.8],
              [0.1, 0.02, 0.18, 0.8], [0.01, 0.02, 0.18, 0.8],
              [0.1, 0.08, 0.72, 0.2], [0.1, 0.72, 0.08, 0.2]]

    assert len(params) == 8

    user_func_filename = "/home/ad247405/git/scripts/2017_parsimony_settings/warm_start/no_covariates/random_start/no_warm_restart_NUDAST_30yo_VBM.py"

    config = dict(data=dict(X="X.npy", y="y.npy"),
                  params=params,
                  resample=cv,
                  structure="mask.nii",
                  start_vector=dict(start_vector="start_vector.npy"),
                  structure_linear_operator_tv="Atv.npz",
                  map_output="model_selectionCV",
                  user_func=user_func_filename,
                  reduce_input="results/*/*",
                  reduce_group_by="params",
                  reduce_output="model_selectionCV.csv")
    json.dump(config, open(os.path.join(WD, "config_dCV.json"), "w"))

    # Build utils files: sync (push/pull) and PBS
    import brainomics.cluster_gabriel as clust_utils
    sync_push_filename, sync_pull_filename, _ = \
        clust_utils.gabriel_make_sync_data_files(WD, wd_cluster=WD_CLUSTER)
    cmd = "mapreduce.py --map  %s/config_dCV.json" % WD_CLUSTER
    clust_utils.gabriel_make_qsub_job_files(WD, cmd, walltime="2500:00:00")
def init():
    INPUT_DATA_X = '/neurospin/brainomics/2016_schizConnect/analysis/NUSDAST/VBM/data/data_30yo/X.npy'
    INPUT_DATA_y = '/neurospin/brainomics/2016_schizConnect/analysis/NUSDAST/VBM/data/data_30yo/y.npy'
    INPUT_MASK_PATH = '/neurospin/brainomics/2016_schizConnect/analysis/NUSDAST/VBM/data/data_30yo/mask.nii'
    INPUT_LINEAR_OPE_PATH = '/neurospin/brainomics/2016_schizConnect/analysis/NUSDAST/VBM/data/data_30yo/Atv.npz'
    NFOLDS_OUTER = 5
    os.makedirs(WD, exist_ok=True)
    shutil.copy(INPUT_DATA_X, WD)
    shutil.copy(INPUT_DATA_y, WD)
    shutil.copy(INPUT_MASK_PATH, WD)
    shutil.copy(INPUT_LINEAR_OPE_PATH, WD)

    if not os.path.exists(os.path.join(WD, "beta_start.npz")):
        betas = dict()
        BETA_START_PATH = "/neurospin/brainomics/2017_parsimony_settings/warm_restart/NUSDAST_30yo/VBM/no_warm_restart/model_selectionCV/cv00/all"
        params = glob.glob(os.path.join(BETA_START_PATH, "0*"))
        for p in params:
            print(p)
            path = os.path.join(p, "beta.npz")
            beta = np.load(path)
            betas[os.path.basename(p)] = beta['arr_0']

        np.savez(os.path.join(WD, "beta_start.npz"), **betas)
        beta_start = np.load(os.path.join(WD, "beta_start.npz"))
        assert np.all(
            [np.all(beta_start[a] == betas[a]) for a in beta_start.keys()])

    y = np.load(INPUT_DATA_y)

    cv_outer = [[tr, te] for tr, te in StratifiedKFold(
        y.ravel(), n_folds=NFOLDS_OUTER, random_state=42)]

    #
    import collections
    cv = collections.OrderedDict()
    for cv_outer_i, (tr_val, te) in enumerate(cv_outer):
        cv["cv%02d/all" % (cv_outer_i)] = [tr_val, te]

    for k in cv:
        cv[k] = [cv[k][0].tolist(), cv[k][1].tolist()]

    print(list(cv.keys()))

    params = [[0.01, 0.72, 0.08, 0.2], [0.01, 0.08, 0.72, 0.2],
              [0.01, 0.18, 0.02, 0.8], [0.1, 0.18, 0.02, 0.8],
              [0.1, 0.02, 0.18, 0.8], [0.01, 0.02, 0.18, 0.8],
              [0.1, 0.08, 0.72, 0.2], [0.1, 0.72, 0.08, 0.2]]

    assert len(params) == 8

    user_func_filename = "/home/ad247405/git/scripts/2017_parsimony_settings/warm_restart/NUDAST_30yo_VBM_cv00_all_as_start_vector.py"

    config = dict(data=dict(X="X.npy", y="y.npy"),
                  params=params,
                  resample=cv,
                  structure="mask.nii",
                  beta_start="beta_start.npz",
                  structure_linear_operator_tv="Atv.npz",
                  map_output="model_selectionCV",
                  user_func=user_func_filename,
                  reduce_input="results/*/*",
                  reduce_group_by="params",
                  reduce_output="model_selectionCV.csv")
    json.dump(config, open(os.path.join(WD, "config_dCV.json"), "w"))

    # Build utils files: sync (push/pull) and PBS
    import brainomics.cluster_gabriel as clust_utils
    sync_push_filename, sync_pull_filename, _ = \
        clust_utils.gabriel_make_sync_data_files(WD, wd_cluster=WD_CLUSTER)
    cmd = "mapreduce.py --map  %s/config_dCV.json" % WD_CLUSTER
    clust_utils.gabriel_make_qsub_job_files(WD, cmd, walltime="2500:00:00")
Beispiel #13
0
#################
# Actual script #
#################

if __name__ == "__main__":
    if not os.path.exists(OUTPUT_DIR):
        os.makedirs(OUTPUT_DIR)
    #Retreive variables
    X = np.load(INPUT_DATA_X)
    y = np.ones(X.shape[0])
    shutil.copy(INPUT_DATA_X, os.path.join(OUTPUT_DIR, "5_folds_NMoprhCH_all"))
    shutil.copy(INPUT_MASK, os.path.join(OUTPUT_DIR, "5_folds_NMoprhCH_all"))

    #############################################################################
    # Create config files
    config_5folds = create_config(y, *(CONFIGS[0]))

    DEBUG = False
    if DEBUG:
        run_test(OUTPUT_DIR, config_5folds)

    # Build utils files: sync (push/pull) and PBS

    sync_push_filename, sync_pull_filename, WD_CLUSTER = \
        clust_utils.gabriel_make_sync_data_files(os.path.join(OUTPUT_DIR,"5_folds_NMoprhCH_all"))
    cmd = "mapreduce.py --map  %s/config.json" % WD_CLUSTER
    clust_utils.gabriel_make_qsub_job_files(os.path.join(
        OUTPUT_DIR, "5_folds_NMoprhCH_all"),
                                            cmd,
                                            walltime="250:00:00")
Beispiel #14
0
#################
# Actual script #
#################

if __name__ == "__main__":
    if not os.path.exists(OUTPUT_DIR):
        os.makedirs(OUTPUT_DIR)
    #Retreive variables
    X = np.load(INPUT_DATA_X)
    y = np.ones(X.shape[0])
    shutil.copy(INPUT_DATA_X, os.path.join(OUTPUT_DIR,
                                           "5_folds_NUDAST_10comp"))
    shutil.copy(INPUT_MASK, os.path.join(OUTPUT_DIR, "5_folds_NUDAST_10comp"))

    #############################################################################
    # Create config files
    config_5folds = create_config(y, *(CONFIGS[0]))

    DEBUG = False
    if DEBUG:
        run_test(OUTPUT_DIR, config_5folds)

    # Build utils files: sync (push/pull) and PBS

    sync_push_filename, sync_pull_filename, WD_CLUSTER = \
        clust_utils.gabriel_make_sync_data_files(os.path.join(OUTPUT_DIR,"5_folds_NUDAST_10comp"))
    cmd = "mapreduce.py --map  %s/config.json" % WD_CLUSTER
    clust_utils.gabriel_make_qsub_job_files(os.path.join(
        OUTPUT_DIR, "5_folds_NUDAST_10comp"),
                                            cmd,
                                            walltime="250:00:00")
Beispiel #15
0
def init():
    os.chdir(WD)
    INPUT_DATA_X = os.path.join('X.npy')
    INPUT_DATA_y = os.path.join('y.npy')
    INPUT_MASK_PATH = os.path.join("mask.nii")
    #WD = os.path.join(WD, 'logistictvenet_5cv')
    if not os.path.exists(WD):
        os.makedirs(WD)

    os.chdir(WD)

    #############################################################################
    ## Create config file
    y = np.load(INPUT_DATA_y)
    if os.path.exists("config.json"):
        inf = open("config.json", "r")
        old_conf = json.load(inf)
        cv = old_conf["resample"]
        inf.close()
    else:
        cv = [[tr.tolist(), te.tolist()]
              for tr, te in StratifiedKFold(y.ravel(), n_folds=NFOLDS_OUTER)]
    if cv[0] is not None:  # Make sure first fold is None
        cv.insert(0, None)
    # parameters grid
    # Re-run with
    tv_range = np.hstack([np.arange(0, 1., .1), [0.05, 0.01, 0.005, 0.001]])
    ratios = np.array([[1., 0., 1], [0., 1., 1], [.5, .5, 1], [.9, .1, 1],
                       [.1, .9, 1], [.01, .99, 1], [.001, .999, 1]])
    alphas = [.01, .05, .1, .5, 1.]
    k_range = [100, 1000, 10000, 100000, -1]
    l1l2tv = [
        np.array([[float(1 - tv), float(1 - tv), tv]]) * ratios
        for tv in tv_range
    ]
    l1l2tv.append(np.array([[0., 0., 1.]]))
    l1l2tv = np.concatenate(l1l2tv)
    alphal1l2tv = np.concatenate([
        np.c_[np.array([[alpha]] * l1l2tv.shape[0]), l1l2tv]
        for alpha in alphas
    ])
    alphal1l2tvk = np.concatenate([
        np.c_[alphal1l2tv, np.array([[k]] * alphal1l2tv.shape[0])]
        for k in k_range
    ])
    params = [params.tolist() for params in alphal1l2tvk]
    """
    inf = open("config.json", "w")
    old_conf = json.load(inf)
    params = old_conf["params"]
    params.append([.1, .05, .6, .35, -1.0])
    params.append([.05, .05, .6, .35, -1.0])
    params.append([.01, .05, .6, .35, -1.0])

    """
    # User map/reduce function file:
    #    try:
    #        user_func_filename = os.path.abspath(__file__)
    #    except:
    user_func_filename = os.path.join(os.environ["HOME"], "git", "scripts",
                                      "2013_adni", "MCIc-CTL",
                                      "02_tvenet_csi.py")
    #print __file__, os.path.abspath(__file__)
    print "user_func", user_func_filename
    #import sys
    #sys.exit(0)
    # Use relative path from config.json
    config = dict(
        data=dict(X=INPUT_DATA_X, y=INPUT_DATA_y),
        params=params,
        resample=cv,
        mask_filename=INPUT_MASK_PATH,
        penalty_start=3,
        map_output="5cv",
        user_func=user_func_filename,
        #reduce_input="rndperm/*/*",
        reduce_group_by="params",
        reduce_output=os.path.basename(results_filenane()).replace(
            "xlsx", "csv"))
    json.dump(config, open(os.path.join(WD, "config.json"), "w"))

    #############################################################################
    # Build utils files: sync (push/pull) and PBS
    import brainomics.cluster_gabriel as clust_utils
    sync_push_filename, sync_pull_filename, WD_CLUSTER = \
        clust_utils.gabriel_make_sync_data_files(WD)
    cmd = "mapreduce.py --map  %s/config.json" % WD_CLUSTER
    clust_utils.gabriel_make_qsub_job_files(WD, cmd)
    #############################################################################
    # Sync to cluster
    print "Sync data to gabriel.intra.cea.fr: "
    os.system(sync_push_filename)
    #############################################################################
    print "# Start by running Locally with 2 cores, to check that everything os OK)"
    print "Interrupt after a while CTL-C"
    print "mapreduce.py --map %s/config.json --ncore 2" % WD
    #os.system("mapreduce.py --mode map --config %s/config.json" % WD)
    print "# 1) Log on gabriel:"
    print 'ssh -t gabriel.intra.cea.fr'
    print "# 2) Run one Job to test"
    print "qsub -I"
    print "cd %s" % WD_CLUSTER
    print "./job_Global_long.pbs"
    print "# 3) Run on cluster"
    print "qsub job_Global_long.pbs"
    print "# 4) Log out and pull Pull"
    print "exit"
    print sync_pull_filename
    #############################################################################
    print "# Reduce"
    print "mapreduce.py --reduce %s/config.json" % WD