id_label = np.load(os.path.join(cf.DATA_DIR,'mnp.npy')) #IDs (filenames) descriptions = np.load(os.path.join(cf.DATA_DIR,'dnp.npy')) #description description_vectors = np.load(os.path.join(cf.DATA_DIR,'vnp.npy')) #vectors encoded padded_encoded_vector = np.load(os.path.join(cf.DATA_DIR,'pnp.npy')) #padded encoded data_from_scratch = not ut.check_for_datafiles(cf.DATA_DIR,['train_txt_data.npy','val_txt_data.npy','all_txt_data.npy']) #data_from_scratch = True random.seed(488) tf.random.set_seed(488) if data_from_scratch: #create files = glob.glob(os.path.join(cf.IMAGE_FILEPATH, "*/img/*")) files = np.asarray(files) train_data, val_data, all_data = ut.split_shuffle_data(padded_encoded_vector,cf_val_frac) # Save base train data to file np.save(os.path.join(cf.DATA_DIR, 'train_txt_data.npy'), train_data, allow_pickle=True) np.save(os.path.join(cf.DATA_DIR, 'val_txt_data.npy'), val_data, allow_pickle=True) np.save(os.path.join(cf.DATA_DIR, 'all_txt_data.npy'), all_data, allow_pickle=True) # also save the vectors we are fitting to else: #load print(f"loading train/validate data from {cf.DATA_DIR}") train_data = np.load(os.path.join(cf.DATA_DIR, 'train_txt_data.npy'), allow_pickle=True) val_data = np.load(os.path.join(cf.DATA_DIR, 'val_txt_data.npy'), allow_pickle=True) all_data = np.load(os.path.join(cf.DATA_DIR, 'all_txt_data.npy'), allow_pickle=True)
## ## LOAD/PREP data ## - l if we've already been through this for the current database we'll load... otherwise process. ##################################################### data_from_scratch = not ut.check_for_datafiles( cf.DATA_DIR, ['train_data.npy', 'val_data.npy', 'all_data.npy']) #data_from_scratch = True random.seed(488) tf.random.set_seed(488) if data_from_scratch: #create files = glob.glob(os.path.join(cf.IMAGE_FILEPATH, "*/img/*")) files = np.asarray(files) train_data, val_data, all_data = ut.split_shuffle_data(files, cf_val_frac) # Save base train data to file np.save(os.path.join(cf.DATA_DIR, 'train_data.npy'), train_data, allow_pickle=True) np.save(os.path.join(cf.DATA_DIR, 'val_data.npy'), val_data, allow_pickle=True) np.save(os.path.join(cf.DATA_DIR, 'all_data.npy'), all_data, allow_pickle=True) else: #load print(f"loading train/validate data from {cf.DATA_DIR}") train_data = np.load(os.path.join(cf.DATA_DIR, 'train_data.npy'), allow_pickle=True)