def generate_all_data(random_model_repeat=c.random_model_repeat):

    K.clear_session()
    start_modeling_time = time.time()
    procs = []
    for iShotBlock in range(random_model_repeat):
        # we run modeling on 1-3 GPUs, GPU 0 is for the network
        os.environ["CUDA_VISIBLE_DEVICES"] = str((iShotBlock % 3) + 1)
        proc = multiprocessing.Process(target=generate_rsf_data_multi,
                                       kwargs={'iShotBlock': iShotBlock})
        proc.start()
        procs.append(proc)
        if len(procs) > 100:
            for proc in procs[:50]:
                proc.join()
            procs = procs[50:]
    for proc in procs:
        proc.join()
    print(f"Time for modeling = {toc(start_modeling_time)}")

    start_merging_rsf_time = time.time()
    cmd(f"sfcat data_*/shots_cmp.rsf axis=4 > shots_cmp_full.rsf")
    cmd(f"sfcat data_*/logs.rsf axis=3 > logs_full.rsf")

    print(f"Time for merging rsf files = {toc(start_merging_rsf_time)}")
    os.environ["CUDA_VISIBLE_DEVICES"] = CUDA_VISIBLE_DEVICES
Exemple #2
0
def train_model(prefix="multi",
                X_scaled=X_scaled_multi,
                T_scaled=T_scaled_multi,
                weights=None):
    cmd("rm new_data_ready")
    #cmd("ssh 10.109.66.7 'rm ~/log_estimation/data/new_data_ready'")
    lr_start = 0.001
    if weights != None:
        lr_start = 1e-5
    net = create_model(np.shape(X_scaled)[1:], np.shape(T_scaled)[1:])
    net.compile(loss=tv_loss,
                optimizer=keras.optimizers.Nadam(lr_start),
                metrics=[R2])

    #net.summary()
    if weights != None:
        net.load_weights(weights)

    early_stopping = EarlyStopping(monitor='val_loss', patience=21)
    model_checkpoint = ModelCheckpoint("trained_net",
                                       monitor='val_loss',
                                       save_best_only=True,
                                       verbose=1,
                                       period=5)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                                  factor=0.5,
                                  patience=7,
                                  min_lr=1e-5,
                                  verbose=1)

    X_valid = X_scaled
    T_valid = T_scaled
    print(f"X validation data size = {np.shape(X_valid)}")

    # TRAINING
    batch_size = 128
    # we flip every batch, so, going through whole data needs twice as many batches
    steps_per_epoch = len(X_scaled) // batch_size
    print(f"Batch size = {batch_size}, batches per epoch = {steps_per_epoch}")
    history = net.fit_generator(
        batch_generator(X_scaled, T_scaled, batch_size=batch_size),
        validation_data=(X_valid, T_valid),
        epochs=100,
        verbose=2,
        shuffle=True,
        max_queue_size=200,
        workers=10,
        use_multiprocessing=False,
        steps_per_epoch=steps_per_epoch,
        callbacks=[model_checkpoint, reduce_lr, early_stopping])

    print("Optimization Finished!")

    return net, history
def generate_rsf_data_multi(model_name="marm.rsf",
                            central_freq=c.central_freq,
                            dt=c.dt,
                            nt=c.nt,
                            sxbeg=c.sxbeg,
                            gxbeg=c.gxbeg,
                            szbeg=c.szbeg,
                            jsx=c.jsx,
                            jgx=c.jgx,
                            jdt=c.jdt,
                            logs_out="logs.rsf",
                            shots_out="shots_cmp.rsf",
                            iShotBlock=None):
    cmd(f"mkdir /dev/shm/RSFTMP/data_{iShotBlock}")
    cmd(f"chmod 777 /dev/shm/RSFTMP/data_{iShotBlock}")
    os.environ["DATAPATH"] = f"/dev/shm/RSFTMP/data_{iShotBlock}"
    cmd(f"echo $DATAPATH")
    cmd(f"mkdir data_{iShotBlock}")
    seed()
    #cmd(f"sfwindow < overthrust3D.hh n3=120 f1={iShotBlock*randint(0,1e7) % 400} n1=1 | sftransp | sfadd scale=1000 | sfput d1=25 d2=25 --out=stdout > data_{iShotBlock}/overthrust2D.hh")
    #cmd(f"cp {c.trmodel} data_{iShotBlock}/")
    with cd(f"data_{iShotBlock}"):
        _vel = generate_model(model_input=f"../{c.trmodel}",
                              random_state_number=(iShotBlock +
                                                   randint(0, 1e7)))
        #plt_nb_T(_vel)
        generate_rsf_data()
Exemple #4
0
def batch_generator(X, T, T_scaler=T_scaler, batch_size=None):
    batch = []
    print(
        "generator restarted !!!!!!!!!!!!!!!!!!!!!!!!!!! waiting for new data")
    while not os.path.exists("new_data_ready"):
        time.sleep(1)
    while True:
        # it might be a good idea to shuffle your data before each epoch
        # for iData in range(40):
        #     print(f"loading NEW DATA {iData}")
        #     X_rsf, T_rsf = read_rsf_XT(shots_rsf=f'/data/ibex_data/fullCMP_{iData}/shots_cmp_full.hh',
        #                                logs_rsf=f'/data/ibex_data/fullCMP_{iData}/logs_cmp_full.hh')
        #     X, T = prepare_XT(X_rsf, T_rsf, T_scaler)

        #     indices = np.arange(len(X))
        #     np.random.shuffle(indices)
        #     for i in indices:
        #         # if os.path.exists("new_data_ready"):
        #         #     break
        #         batch.append(i)
        #         if len(batch)==batch_size:
        #             yield X[batch], T[batch]
        #             batch=[]
        if os.path.exists("new_data_ready"):
            cmd("rm new_data_ready")
            X_rsf, T_rsf = read_rsf_XT(shots_rsf='shots_cmp_full.rsf',
                                       logs_rsf='logs_full.rsf')
            #cmd("ssh glogin.ibex.kaust.edu.sa 'rm ~/log_estimation/data/new_data_ready'")
            X, T = prepare_XT(X_rsf, T_rsf, T_scaler)
            print("new data loaded")
        else:
            print("reusing the old data")
        indices = np.arange(len(X))
        np.random.shuffle(indices)
        print("indices reshuffled")
        for i in indices:
            if os.path.exists("new_data_ready"):
                break
            batch.append(i)
            if len(batch) == batch_size:
                yield X[batch], T[batch]
                batch = []
def generate_rsf_data(model_name="marm.rsf",
                      central_freq=c.central_freq,
                      dt=c.dt,
                      dx=c.dx,
                      nt=c.nt,
                      sxbeg=c.sxbeg,
                      gxbeg=c.gxbeg,
                      szbeg=c.szbeg,
                      jsx=c.jsx,
                      jgx=c.jgx,
                      jdt=c.jdt,
                      logs_out="logs.rsf",
                      shots_out="shots_cmp.rsf",
                      full_shots_out=None):

    #get size of the model
    model_orig = sf.Input(model_name)
    Nx = model_orig.int("n2")
    print(Nx)
    ns = (Nx - 2 * sxbeg) // jgx
    ng = 2 * (sxbeg - gxbeg) // jgx + 1
    print(f"Total number of shots = {ns}")
    t_start = time.time()
    cmd((
        f"sfgenshots < {model_name} csdgather=y fm={central_freq} amp=1 dt={dt} ns={ns} ng={ng} nt={nt} "
        f"sxbeg={sxbeg} chk=n szbeg={szbeg} jsx={jgx} jsz=0 gxbeg={gxbeg} gzbeg={szbeg} jgx={jgx} jgz=0 > shots.rsf"
    ))
    print(f"Modeling time for {ns} shots = {time.time()-t_start}")
    if full_shots_out != None:
        cmd(f"sfcp < shots.rsf > {full_shots_out}")

    #   ## Analyze and filter the data set generated
    # correct header and reduce sampling in time jdt (usually 4) times
    cmd(f"sfput < shots.rsf d3={jgx*dx} | sfwindow j1={jdt} | sfbandpass flo=2 fhi=4 > shots_decimated.rsf"
        )
    cmd(f"sfrm shots.rsf")
    # sort into cmp gathers and discard odd cmps and not full cmps
    cmd(f"sfshot2cmp < shots_decimated.rsf half=n | sfwindow j3=2 f3={ng//2} n3={ns} > {shots_out}"
        )
    print(
        f"sfshot2cmp < shots_decimated.rsf half=n | sfwindow j3=2 f3={ng//2} n3={ns} > {shots_out}"
    )
    # cmd(f"sfrm shots_decimated.rsf")
    # cmd(f"sfrm shots_decimated.rsf")
    # create the logs -- training outputs
    cmd(f"sfsmooth < {model_name} rect2=2 | sfwindow f2={sxbeg} j2={jsx} n2={ns} > {logs_out}"
        )
    #cmd(f"sfin < {logs_out}")
    return 0
                     aug_flip, merge_dict, np_to_rsf, rsf_to_np, nrms,
                     tf_random_flip_channels)
from myutils import const as c

seed()
# set up matplotlib
matplotlib.rc('image', cmap='RdBu_r')
seaborn.set_context('paper', font_scale=5)

CUDA_VISIBLE_DEVICES = "1,2,3"

os.environ["CUDA_VISIBLE_DEVICES"] = CUDA_VISIBLE_DEVICES
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"

# Madagascar binaries will be stored in DATAPATH (RAM on Linux recommended)
cmd("mkdir /dev/shm/RSFTMP")
cmd("chmod 777 /dev/shm/RSFTMP")
os.environ["DATAPATH"] = "/dev/shm/RSFTMP/"

#%%

alpha_deform = 500
sigma_deform = 50


def generate_model(model_input=c.trmodel,
                   model_output="marm.rsf",
                   dx=c.dx,
                   stretch_X=1,
                   training_flag=False,
                   random_state_number=c.random_state_number,
Exemple #7
0
def test_on_model(folder="marmvel1D",
                  net_dict=None,
                  prefix="singleCMP",
                  model_filename=None, 
                  distort_flag=False,
                  stretch_X=None,
                  nCMP_max=nCMP,
                  generate_rsf_data_flag=True,
                  jgx=jgx, sxbeg=sxbeg, gxbeg=gxbeg,
                  X_scaler=X_scaler):
    
    if model_filename==None:
        model_filename=f"{folder}.hh"
    
    fig_path = f"../latex/Fig/test_{prefix}_{folder}"
    
    # expand model
    model_output="vel_test.rsf"
    print(model_output)
    vel_test = generate_model(model_input=model_filename, 
                              model_output=model_output, 
                              stretch_X=stretch_X,
                              random_state_number=const.random_state_number,
                              distort_flag=distort_flag,
                              crop_flag=False)
          
    # model data
    if generate_rsf_data_flag:
        cmd(f"mkdir {folder}")
        cmd(f"cp {model_output} {folder}/{model_output}")
        # check stability
        print(f"you chose dt = {dt}, dt < {dx/np.max(vel_test):.4f} should be chosen for stability \n")
        # force stability
        assert dt < dx/np.max(vel_test)
        generate_rsf_data(model_name=f"{folder}/vel_test.rsf", 
                          shots_out=f"{folder}/shots_cmp_test.rsf", 
                          logs_out=f"{folder}/logs_test.rsf")
    
    # read data
    X_data_test, T_data_test = read_rsf_to_np(shots_rsf=f"{folder}/shots_cmp_test.rsf", 
                                              logs_rsf=f"{folder}/logs_test.rsf")
    
    # X_scaled
    X_scaled = scale_X_data(X_data_test, X_scaler)
    
    nCMP = int(net_dict["0"].input.shape[3])
    X_scaled, T_data_test = make_multi_CMP_inputs(X_scaled, T_data_test, nCMP_max)
    sample_reveal = nCMP_max+1
    plt_nb_T(1e3*np.concatenate((np.squeeze(X_scaled[sample_reveal,:,:,-1]), np.flipud(np.squeeze(X_scaled[sample_reveal,:,:,0]))), axis=0),
        title="CMP first | CMP last", dx=200, dz=1e3*dt*jdt, 
        origin_in_middle=True, ylabel="Time(s)", fname=f"{fig_path}_X_scaled", cbar_label = "")
    if nCMP == 1:
        X_scaled = X_scaled[:,:,:,nCMP_max//2:nCMP_max//2+1]
    
    # predict with all networks and save average
    T_pred_total = np.zeros_like(net_dict["0"].predict(X_scaled))    
    T_pred_dict = np.zeros((2*len(net_dict), T_pred_total.shape[0], T_pred_total.shape[1]))
    
    iNet=0
    for net in net_dict.values():
        T_pred_tmp = net.predict(X_scaled)
        T_pred_tmp = T_scaler.inverse_transform(T_pred_tmp)
        T_pred_dict[iNet,:,:] = T_pred_tmp
        T_pred_tmp = net.predict(np.flip(X_scaled, axis=3))
        T_pred_tmp = T_scaler.inverse_transform(T_pred_tmp)
        T_pred_dict[iNet+1,:,:] = T_pred_tmp
        iNet += 2
   
    T_pred = np.mean(T_pred_dict, axis=0)
    variance = np.var(T_pred_dict, axis=0)
    
    
    plt_nb_T(np.sqrt(variance), title="Standard deviation",
             dx=jgx*dx, dz=jlogz*dx,
             fname=f"{fig_path}_inverted_std_dev",
             vmin=0.05, vmax=1)
    
    # plt_nb_T(T_pred-T_data_test, title="Pred-True",
    #          dx=jgx*dx, dz=jlogz*dx,
    #          fname=f"{fig_path}_inverted_std_dev",
    #          vmin=-1, vmax=1)
    
    plt_nb_T(T_pred, title=f"{prefix} estimate, NRMS={nrms(T_pred, T_data_test):.1f}%",
             dx=jgx*dx, dz=jlogz*dx,
             vmin=np.min(1e-3*T_data_test), 
             vmax=np.max(1e-3*T_data_test),
             fname=f"{fig_path}_inverted")
        
    plt_nb_T(T_data_test,
             dx=jgx*dx, dz=jlogz*dx,
             fname=f"{fig_path}_true",
             title=f"True, R2 = {r2_score(T_pred.flatten(), T_data_test.flatten()):.2f}")
    
    print(np.shape(1e3*T_scaled[sample_reveal-(nCMP+1)//2:sample_reveal+(nCMP-1)//2:nCMP]))
Exemple #8
0
#import styler
from myutils import (cd, cmd, const, elastic_transform, plt_nb_T, toc, aug_flip, 
                     merge_dict, np_to_rsf, rsf_to_np, nrms, tf_random_flip_channels)

seed()
# set up matplotlib
matplotlib.rc('image', cmap='RdBu_r')
seaborn.set_context('paper', font_scale=5)

CUDA_VISIBLE_DEVICES = "0,1,2,3"

os.environ["CUDA_VISIBLE_DEVICES"]=CUDA_VISIBLE_DEVICES
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"

# Madagascar binaries will be stored in DATAPATH (RAM on Linux recommended)
cmd("mkdir /dev/shm/RSFTMP")
cmd("chmod 777 /dev/shm/RSFTMP")
os.environ["DATAPATH"]="/dev/shm/RSFTMP/"

# execution flags
generate_rsf_data_flag = False
retrain_flag = False #(sys.argv[1] == "--retrain")
print(f"retrain_flag = {retrain_flag}")
print(type(retrain_flag))
random_model_repeat = 5000
stretch_X_train = 1

tic_total = time.time()

#%% [markdown]
# ## Introduction