def test_numpy_format(): maybe_build_index() td = TrainingData('pcode_abs') td.load_disk_cache(TMP_DIR, 150) assert td.songs[0][1][0][0].flags['OWNDATA'] == True assert not td.songs[0][1][0][0].base
def test_random_rel_ofs(): td = TrainingData('pcode_abs') td.load_mod_file(TEST_PATH / 'zodiak_-_gasp.mod') for _ in range(10): s_i, ss_i, t_i, o = random_rel_ofs(td, 10_000) transp = td.songs[s_i][ss_i][t_i] assert len(transp) < o + 10_000
def test_split(): maybe_build_index() td = TrainingData('pcode_abs') td.load_disk_cache(TMP_DIR, 150) train, valid, test = td.split_3way(0.8, 0.1) assert len(train.songs) + len(valid.songs) + len(test.songs) \ == len(td.songs) assert train.songs[0][1][0][0].flags['OWNDATA'] assert not train.songs[0][1][0][0].base
def test_pcode_rel(): maybe_build_index() td = TrainingData('pcode_rel') td.load_disk_cache(TMP_DIR, 150) assert len(td.songs) == 26 # end_idx = td.encoder.encode_char((INSN_END, 0), False) subsongs_per_song = { 'beast2-ingame-st.mod' : 5, 'entity.mod' : 2 } for name, subsongs in td.songs: n_subsongs = subsongs_per_song.get(name, 1) assert len(subsongs) == n_subsongs
def test_pcode_td(): td = TrainingData('pcode_abs') td.load_mod_file(TEST_PATH / 'zodiak_-_gasp.mod') # end_idx = td.encoder.encode_char((INSN_END, 0), False) assert len(td.songs) == 1 assert td.songs[0][0] == 'zodiak_-_gasp.mod' first_subsong = td.songs[0][1][0] assert sum(transp.shape[0] for transp in first_subsong) == 13423 * 5 code = td.encoder.decode_chars(first_subsong[0][:120]) code = [(c, a) for (c, a) in code if c == INSN_PITCH] assert code == [('P', 24), ('P', 24), ('P', 27), ('P', 27), ('P', 24)] transp0 = first_subsong[0] n_code = len(transp0) for transp in first_subsong[1:]: assert len(transp) == n_code assert not np.array_equal(transp, transp0)
def main(): # Prologue args = docopt(__doc__, version = 'Model stats 1.0') SP.enabled = args['--verbose'] root_path = Path(args['<root-path>']) # Kind of code g = get_code_generator(args['<model>']) # Load training data td = TrainingData(g['code-type']) td.load_disk_cache(root_path, 150) stats_path = root_path / 'stats' stats_path.mkdir(exist_ok = True) png_path = stats_path / ('tokens-%s.png' % g['code-type']) token_distribution_plot(td, png_path) weights_dir = root_path / 'weights' log_path = weights_dir / log_file(g) png_path = stats_path / ('loss-%s.png' % file_stem(g)) loss_plot(log_path, png_path)
def test_save_generated_sequences(): g = get_code_generator('orig-pcode') output_path = TMP_DIR / 'generated' if output_path.exists(): rmtree(output_path) output_path.mkdir(parents = True) td = TrainingData('pcode_abs') td.load_disk_cache(TMP_DIR, 150) offsets = [random_rel_ofs(td, 100)] seqs = [np.array([1, 2, 3, 4, 5])] log_probs = [-100] skews = [('top-p', 0.98)] save_generated_sequences(g, output_path, td, seqs, offsets, log_probs, skews) files = list(output_path.glob('*.pickle.gz')) assert len(files) == 1 parts = files[0].stem.split('-') rel_ofs = [int(p) for p in parts[:4]] for p in rel_ofs: assert p >= 0
def test_dcode(): maybe_build_index() td = TrainingData('dcode') td.load_disk_cache(TMP_DIR, 150) assert len(td.songs) == 26
def test_histogram(): td = TrainingData('pcode_abs') td.load_mod_file(TEST_PATH / 'im_a_hedgehog.mod') print_histogram(td)
def test_abs_to_rel_ofs(): td = TrainingData('pcode_abs') td.load_mod_file(TEST_PATH / 'zodiak_-_gasp.mod') rel_ofs = abs_ofs_to_rel_ofs(td, 36000) assert rel_ofs == (0, 0, 2, 9154)