def load_model_pair(model_cluster, model_name): real_file = get_model_dir() / f"c_{model_cluster}_{model_name}_real.h5" fake_file = get_model_dir() / f"c_{model_cluster}_{model_name}_fake.h5" assert real_file.is_file() and fake_file.is_file() model_real = load_model(real_file) model_fake = load_model(fake_file) return model_real, model_fake
def model_pairs(): model_dir = get_model_dir() assert model_dir.is_dir() model_files = list (sorted(model_dir.iterdir())) model_files = [x for x in model_files if "h5" in str(x)] model_files = [x.name for x in model_files] model_files = [x[2:] for x in model_files] cluster = [x.split("_")[0] for x in model_files] name = [x.split("_")[1] for x in model_files] realfake = [x.split("_")[2] for x in model_files] realfake = [x[:-3] for x in realfake] df_m = pd.DataFrame({'name': name, 'cluster': cluster, 'rf' : realfake}) sCount = df_m.groupby('name').size() df_m = df_m.assign(numfiles = df_m.name.map(sCount)) m_single = df_m.numfiles == 1 print (f"Dropping singles {m_single.sum()}") df_m = df_m[~m_single].reset_index(drop = True) df_m = df_m.drop(['rf', 'numfiles'], axis = 1) m = df_m.duplicated(subset = 'name') df_m = df_m[~m].reset_index(drop = True) return df_m
def process_videoset(iCluster, original): input_dir = get_ready_data_dir() output_dir = get_model_dir() input_df = input_dir / f"c_{iCluster}_{original}.pkl" input_npy = input_dir / f"c_{iCluster}_{original}.npy" isInputExisting = input_df.is_file() and input_npy.is_file() if not isInputExisting: # print (f"Missing input for {iCluster}_{original}") return output_model_real = output_dir / f"c_{iCluster}_{original}_real.h5" output_model_fake = output_dir / f"c_{iCluster}_{original}_fake.h5" isOutputExisting = output_model_real.is_file( ) and output_model_fake.is_file() if isOutputExisting: print(f"{iCluster}_{original} already created") return print(f"Processing c_{iCluster}_{original}...") df = pd.read_pickle(input_df) data = np.load(input_npy) m_fake = (df.fake == True) m_real = (df.fake == False) mse_fake, model_fake = train_model(data[m_fake]) mse_real, model_real = train_model(data[m_real]) print(f"c_{iCluster}_{original}: mse_fake {mse_fake} mse_real {mse_real}") model_fake.save(output_model_fake) model_real.save(output_model_real)
return acDiffReal if isKaggle: os.chdir('/kaggle/working') l_m = [('200', 'qhhkcsvlod'), ('201', 'ahkibiituu'), ('201', 'ajconjiwey'), ('210', 'dfembozird'), ('210', 'lhtohlvehk'), ('210', 'yrqhcjnpix'), ('211', 'copowfosob'), ('211', 'ctlqptsltq'), ('211', 'ddqybqgnkl'), ('220', 'aguxjvffln'), ('220', 'akmkangqbj'), ('220', 'aqtypfezoi'), ('220', 'biotzvraxy'), ('220', 'bthweewuqp'), ('220', 'bwvmskoriy'), ('220', 'cyzgavhyiv')] l_models = [] for x in l_m: model_real, model_fake = load_model_pair(x[0], x[1]) l_models.append((model_real, model_fake)) model_stage2 = pickle.load(open(get_model_dir() / "finalized_model.sav", 'rb')) input_dir = get_test_dir() model_dir = get_model_dir() submission_dir = get_submission_dir() mtcnn_detector = MTCNNDetector() l_files = list (sorted(input_dir.iterdir())) l_filenames = [str(x.name) for x in l_files] d_res = {}
df.to_pickle(get_meta_dir() / "df_tgs.pkl") idx_train = np.where(m_train)[0] # Todo: seed np.random.shuffle(idx_train) num_max_files_per_run = 7000 num_splits = int(1 + idx_train.shape[0] / num_max_files_per_run) l_idx_train = np.array_split(idx_train, num_splits) z_model_name = "my_keras" checkpoint_path = str(get_model_dir() / f"{z_model_name}.model") K.clear_session() model = get_unet_resnet(input_shape=(img_size_target, img_size_target, 3)) #from keras.models import load_model #model = load_model(checkpoint_path) #add custom object function model.compile(loss=bce_dice_loss, optimizer='adam', metrics=[my_iou_metric]) model_checkpoint = ModelCheckpoint(checkpoint_path, monitor='val_my_iou_metric', mode='max', save_best_only=True,
l_target = [str(x.stem).split("_")[4] for x in l_files] filetuple = zip(l_files, l_target) return filetuple zTime = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") log_dir_real = get_log_dir() / (zTime + "_real") log_dir_fake = get_log_dir() / (zTime + "_fake") #file_writer_real = tf.summary.create_file_writer(str(log_dir_real)) #file_writer_fake = tf.summary.create_file_writer(str(log_dir_fake)) model_dir = get_model_dir() num_timesteps = 32 nTrainLimit = 0 class TestHub: def __init__(self, l_test_path): self.l_test_file = [x[0] for x in l_test_path] self.l_test_target = [x[1] for x in l_test_path] for x in self.l_test_file: assert x.is_file() s = pd.Series(self.l_test_target)
s_data.append(aF) s_name.append(sample_lineset['name']) s_cluster.append(sample_lineset['cluster']) s_y.append(False) s_data.append(aT) df_meta = pd.DataFrame({'cluster': s_cluster, 'name': s_name, 'y': s_y}) df = pd.concat([df_meta, pd.DataFrame(np.stack(s_data))], axis = 1) df = df.assign(cluster = df.cluster.astype(int)) df.to_pickle(get_model_dir() / "stage2.pkl") l_cluster_valid = [11, 100, 31, 120, 50,30, 180, 130, 0, 10] df = df.assign(validation = df.cluster.isin(l_cluster_valid)) y = df.y validation = df.validation df = df.drop(['cluster', 'name', 'validation','y'], axis = 1) X = np.array(df)