Exemple #1
0
def load_model_pair(model_cluster, model_name):

    real_file = get_model_dir() / f"c_{model_cluster}_{model_name}_real.h5"
    fake_file = get_model_dir() / f"c_{model_cluster}_{model_name}_fake.h5"
    assert real_file.is_file() and fake_file.is_file()

    model_real = load_model(real_file)
    model_fake = load_model(fake_file)

    return model_real, model_fake
def model_pairs():


    model_dir = get_model_dir()

    assert model_dir.is_dir()

    model_files = list (sorted(model_dir.iterdir()))

    model_files = [x for x in model_files if "h5" in str(x)]

    model_files = [x.name for x in model_files]

    model_files = [x[2:] for x in model_files]

    cluster = [x.split("_")[0] for x in model_files]

    name = [x.split("_")[1] for x in model_files]

    realfake = [x.split("_")[2] for x in model_files]

    realfake = [x[:-3] for x in realfake]

    df_m = pd.DataFrame({'name': name, 'cluster': cluster, 'rf' : realfake})

    sCount = df_m.groupby('name').size()

    df_m = df_m.assign(numfiles = df_m.name.map(sCount))


    m_single = df_m.numfiles == 1

    print (f"Dropping singles {m_single.sum()}")

    df_m = df_m[~m_single].reset_index(drop = True)

    df_m = df_m.drop(['rf', 'numfiles'], axis = 1)

    m = df_m.duplicated(subset = 'name')

    df_m = df_m[~m].reset_index(drop = True)

    return df_m
def process_videoset(iCluster, original):
    input_dir = get_ready_data_dir()
    output_dir = get_model_dir()

    input_df = input_dir / f"c_{iCluster}_{original}.pkl"
    input_npy = input_dir / f"c_{iCluster}_{original}.npy"

    isInputExisting = input_df.is_file() and input_npy.is_file()

    if not isInputExisting:
        # print (f"Missing input for {iCluster}_{original}")
        return

    output_model_real = output_dir / f"c_{iCluster}_{original}_real.h5"
    output_model_fake = output_dir / f"c_{iCluster}_{original}_fake.h5"

    isOutputExisting = output_model_real.is_file(
    ) and output_model_fake.is_file()

    if isOutputExisting:
        print(f"{iCluster}_{original} already created")
        return

    print(f"Processing c_{iCluster}_{original}...")

    df = pd.read_pickle(input_df)

    data = np.load(input_npy)

    m_fake = (df.fake == True)
    m_real = (df.fake == False)

    mse_fake, model_fake = train_model(data[m_fake])
    mse_real, model_real = train_model(data[m_real])

    print(f"c_{iCluster}_{original}: mse_fake {mse_fake} mse_real {mse_real}")

    model_fake.save(output_model_fake)
    model_real.save(output_model_real)
Exemple #4
0
    return acDiffReal

if isKaggle:
    os.chdir('/kaggle/working')

l_m = [('200', 'qhhkcsvlod'), ('201', 'ahkibiituu'), ('201', 'ajconjiwey'), ('210', 'dfembozird'), ('210', 'lhtohlvehk'), ('210', 'yrqhcjnpix'), ('211', 'copowfosob'), ('211', 'ctlqptsltq'), ('211', 'ddqybqgnkl'), ('220', 'aguxjvffln'), ('220', 'akmkangqbj'), ('220', 'aqtypfezoi'), ('220', 'biotzvraxy'), ('220', 'bthweewuqp'), ('220', 'bwvmskoriy'), ('220', 'cyzgavhyiv')]


l_models = []

for x in l_m:
    model_real, model_fake = load_model_pair(x[0], x[1])
    l_models.append((model_real, model_fake))


model_stage2 = pickle.load(open(get_model_dir() / "finalized_model.sav", 'rb'))


input_dir = get_test_dir()

model_dir = get_model_dir()
submission_dir = get_submission_dir()


mtcnn_detector = MTCNNDetector()

l_files = list (sorted(input_dir.iterdir()))

l_filenames = [str(x.name) for x in l_files]

d_res = {}
Exemple #5
0
df.to_pickle(get_meta_dir() / "df_tgs.pkl")

idx_train = np.where(m_train)[0]

# Todo: seed
np.random.shuffle(idx_train)

num_max_files_per_run = 7000

num_splits = int(1 + idx_train.shape[0] / num_max_files_per_run)

l_idx_train = np.array_split(idx_train, num_splits)

z_model_name = "my_keras"
checkpoint_path = str(get_model_dir() / f"{z_model_name}.model")

K.clear_session()

model = get_unet_resnet(input_shape=(img_size_target, img_size_target, 3))

#from keras.models import load_model
#model = load_model(checkpoint_path)
#add custom object function

model.compile(loss=bce_dice_loss, optimizer='adam', metrics=[my_iou_metric])

model_checkpoint = ModelCheckpoint(checkpoint_path,
                                   monitor='val_my_iou_metric',
                                   mode='max',
                                   save_best_only=True,
Exemple #6
0
    l_target = [str(x.stem).split("_")[4] for x in l_files]

    filetuple = zip(l_files, l_target)

    return filetuple


zTime = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

log_dir_real = get_log_dir() / (zTime + "_real")
log_dir_fake = get_log_dir() / (zTime + "_fake")

#file_writer_real = tf.summary.create_file_writer(str(log_dir_real))
#file_writer_fake = tf.summary.create_file_writer(str(log_dir_fake))

model_dir = get_model_dir()

num_timesteps = 32

nTrainLimit = 0


class TestHub:
    def __init__(self, l_test_path):
        self.l_test_file = [x[0] for x in l_test_path]
        self.l_test_target = [x[1] for x in l_test_path]

        for x in self.l_test_file:
            assert x.is_file()

        s = pd.Series(self.l_test_target)
    s_data.append(aF)

    s_name.append(sample_lineset['name'])
    s_cluster.append(sample_lineset['cluster'])
    s_y.append(False)
    s_data.append(aT)



df_meta = pd.DataFrame({'cluster': s_cluster, 'name': s_name, 'y': s_y})

df = pd.concat([df_meta, pd.DataFrame(np.stack(s_data))], axis = 1)

df = df.assign(cluster = df.cluster.astype(int))

df.to_pickle(get_model_dir() / "stage2.pkl")


l_cluster_valid = [11, 100, 31, 120, 50,30, 180, 130, 0, 10]

df = df.assign(validation = df.cluster.isin(l_cluster_valid))


y = df.y

validation = df.validation

df = df.drop(['cluster', 'name', 'validation','y'], axis = 1)

X = np.array(df)