def push():
    paths = project_paths()
    root_path = paths['root']
    config_path = paths['config']

    with config_path.open(mode='r') as f:
        config = yaml.load(f, Loader=yaml.FullLoader)

    train_path = root_path / config['interim_data_dir']
    label_path = root_path / config['train_data']

    r = redis.Redis(host='localhost', port=6379)

    images_to_redis(r, train_path, config_path)
    labels_to_redis(r, label_path)
        self.test_folds = []
        ids = json.loads(redis_db.get('grapheme_ids'))
        random.shuffle(ids)
        kf = KFold(n_splits=folds)
        output_type = (tf.int32, tf.float32, tf.float32)
        for train_idx, test_ids in kf.split(ids):
            ds1 = data_generator(redis_db, [ids[idx] for idx in train_idx])
            ds2 = data_generator(redis_db, [ids[idx] for idx in train_idx])
            dsg1 = tf.data.Dataset.from_generator(lambda: ds1, output_type)
            dsg2 = tf.data.Dataset.from_generator(lambda: ds2, output_type)
            self.train_folds.append(dsg1)
            self.test_folds.append(dsg2)


if __name__ == '__main__':
    paths = project_paths()
    root_path = paths['root']
    config_path = paths['config']

    with config_path.open(mode='r') as f:
        config = yaml.load(f, Loader=yaml.FullLoader)

    r = redis.Redis(
        host='localhost',
        port=6379
    )
    ids = json.loads(r.get('grapheme_ids'))
    dataset = data_generator(r, ids)

    # for x in range(4):
    #     print(next(dataset))
Exemple #3
0
                            processed_samples:processed_samples + file_size,
                            1:].to_numpy()
                        np_ids = df.iloc[processed_samples:processed_samples +
                                         file_size, 0].to_numpy()
                        np.savez(output_file, ids=np_ids, images=np_samples)
                        print(np_ids.shape, np_samples.shape)
                        processed_samples += rows
                        file_idx += 1

                    print("Complete.")

    return


if __name__ == '__main__':
    CONFIG_PATH = project_paths()["config"]

    with CONFIG_PATH.open(mode='r') as config_file:
        config = yaml.load(config_file, Loader=yaml.FullLoader)

    parser = argparse.ArgumentParser()
    parser.add_argument('prefix', nargs='?', type=str, default='train')
    parser.add_argument('-p', '--parquet', action="store_true")
    parser.add_argument('-n', '--numpy', action="store_true")
    parser.add_argument('-r',
                        '--rows',
                        type=int,
                        nargs='?',
                        const=int(config['row_group_size']),
                        default=int(config['row_group_size']))
    args = parser.parse_args()
Exemple #4
0
    max_vd_len = 0
    max_cd_len = 0

    with label_path.open(mode='r') as label_csv:

        csv_reader = csv.reader(label_csv)
        next(csv_reader)
        for row in csv_reader:

            if row[0] == 'grapheme_root':
                print(len(row[2]), "yo")
                max_gr_len = max(max_gr_len, len(row[2]))

            if row[0] == 'vowel_diacritic':
                print("here")
                max_vd_len = max(max_vd_len, len(row[2]))

            if row[0] == 'consonant_diacritic':
                max_cd_len = max(max_cd_len, len(row[2]))

    return max_gr_len, max_vd_len, max_cd_len


if __name__ == '__main__':
    data = project_paths()["data"] / 'raw' / 'class_map.csv'
    print(max_char_length(
        '/home/scott/Projects'
        '/kaggle__bengaliai_handwritten_grapheme_classification/data/raw'
        '/class_map'
        '.csv'))