def push(): paths = project_paths() root_path = paths['root'] config_path = paths['config'] with config_path.open(mode='r') as f: config = yaml.load(f, Loader=yaml.FullLoader) train_path = root_path / config['interim_data_dir'] label_path = root_path / config['train_data'] r = redis.Redis(host='localhost', port=6379) images_to_redis(r, train_path, config_path) labels_to_redis(r, label_path)
self.test_folds = [] ids = json.loads(redis_db.get('grapheme_ids')) random.shuffle(ids) kf = KFold(n_splits=folds) output_type = (tf.int32, tf.float32, tf.float32) for train_idx, test_ids in kf.split(ids): ds1 = data_generator(redis_db, [ids[idx] for idx in train_idx]) ds2 = data_generator(redis_db, [ids[idx] for idx in train_idx]) dsg1 = tf.data.Dataset.from_generator(lambda: ds1, output_type) dsg2 = tf.data.Dataset.from_generator(lambda: ds2, output_type) self.train_folds.append(dsg1) self.test_folds.append(dsg2) if __name__ == '__main__': paths = project_paths() root_path = paths['root'] config_path = paths['config'] with config_path.open(mode='r') as f: config = yaml.load(f, Loader=yaml.FullLoader) r = redis.Redis( host='localhost', port=6379 ) ids = json.loads(r.get('grapheme_ids')) dataset = data_generator(r, ids) # for x in range(4): # print(next(dataset))
processed_samples:processed_samples + file_size, 1:].to_numpy() np_ids = df.iloc[processed_samples:processed_samples + file_size, 0].to_numpy() np.savez(output_file, ids=np_ids, images=np_samples) print(np_ids.shape, np_samples.shape) processed_samples += rows file_idx += 1 print("Complete.") return if __name__ == '__main__': CONFIG_PATH = project_paths()["config"] with CONFIG_PATH.open(mode='r') as config_file: config = yaml.load(config_file, Loader=yaml.FullLoader) parser = argparse.ArgumentParser() parser.add_argument('prefix', nargs='?', type=str, default='train') parser.add_argument('-p', '--parquet', action="store_true") parser.add_argument('-n', '--numpy', action="store_true") parser.add_argument('-r', '--rows', type=int, nargs='?', const=int(config['row_group_size']), default=int(config['row_group_size'])) args = parser.parse_args()
max_vd_len = 0 max_cd_len = 0 with label_path.open(mode='r') as label_csv: csv_reader = csv.reader(label_csv) next(csv_reader) for row in csv_reader: if row[0] == 'grapheme_root': print(len(row[2]), "yo") max_gr_len = max(max_gr_len, len(row[2])) if row[0] == 'vowel_diacritic': print("here") max_vd_len = max(max_vd_len, len(row[2])) if row[0] == 'consonant_diacritic': max_cd_len = max(max_cd_len, len(row[2])) return max_gr_len, max_vd_len, max_cd_len if __name__ == '__main__': data = project_paths()["data"] / 'raw' / 'class_map.csv' print(max_char_length( '/home/scott/Projects' '/kaggle__bengaliai_handwritten_grapheme_classification/data/raw' '/class_map' '.csv'))