def draw_plagiarism(lo_ngram, hi_ngram, code_gen, measurements, training_data): SP.print('Loading training data...') td = load_pickle(training_data) songs = td[1] # Strip names songs = [c for (n, c) in songs] SP.print('Flattening %d songs...' % len(songs)) tokens = flatten(flatten(flatten(songs))) tokens = np.array(tokens, dtype=np.uint16) SP.print('Loading samples...') data = load_pickle(measurements) stats = data[code_gen] gen = stats[False] seqs = list(gen.values()) seqs = [s[0] for s in seqs] n_samples = 1000 plag_ratios = {} for ngram in range(lo_ngram, hi_ngram): SP.header('FINDING MATCHES FOR NGRAMS OF LENGTH %d' % ngram) samples = [sample_seq(seqs, ngram) for _ in range(n_samples)] n_matches = find_samples(tokens, samples) frac = n_matches / n_samples SP.print('%d samples matches, %.2f%%.' % (n_matches, 100 * frac)) SP.leave() plag_ratios[ngram] = frac print(plag_ratios)
def percussive_samples(mod): subsongs = linearize_subsongs(mod, 1) rows = flatten(r for (_, r) in subsongs) volumes = [header.volume for header in mod.sample_headers] notes = list(rows_to_mod_notes(rows, volumes)) return {sample for (sample, p) in sample_props(mod, notes).items() if p.is_percussive}
def notes_to_midi_file(notes, midi_file, midi_mapping): SP.header('MIDI MAPPING', '%d samples', len(midi_mapping)) SP.print('sample midi base dur vol') fmt = '%6d %4d %4d %3d %3.2f' for sample_idx, midi_def in midi_mapping.items(): SP.print(fmt, (sample_idx,) + tuple(midi_def)) SP.leave() notes_per_channel = sort_groupby(notes, lambda n: n.col_idx) notes_per_channel = [list(grp) for (_, grp) in notes_per_channel] notes_per_channel = [ list(mod_notes_to_midi_notes(notes, midi_mapping)) for notes in notes_per_channel] notes = sorted(flatten(notes_per_channel)) SP.print('Produced %d midi notes (on/offs).' % len(notes)) # Group by column (9 for drums) note_groups = groupby(notes, lambda el: el[0]) tracks = [MidiTrack(list(midi_notes_to_track(channel, note_group))) for (channel, note_group) in note_groups] midi = MidiFile(type = 1) midi.tracks = tracks midi.save(midi_file)
def load_data_from_disk(corpus_path, kb_limit): index = load_index(corpus_path) mods = [ mod for mod in index.values() if (mod.n_channels == 4 and mod.format == 'MOD' and mod.kb_size <= kb_limit) ] file_paths = [corpus_path / mod.genre / mod.fname for mod in mods] patterns = [mod_file_to_patterns(path) for path in file_paths] patterns = flatten(patterns) patterns = [p for p in patterns if good_pattern(p)] return np.array(patterns, dtype=np.int8)
def rows_to_mod_notes(rows, volumes): return flatten([column_to_mod_notes(rows, i, volumes) for i in range(4)])
def upload_caches(connection, corpus_path): caches = [corpus_path.glob(f'*.{ext}') for ext in ['pickle', 'npy']] caches = flatten(caches) + [corpus_path / 'index'] files = [(c, c.name) for c in caches] upload_files(connection, files)
def upload_code(connection, sftp): dirs = [Path(d) for d in ['musicgen', 'tools']] for dir in dirs: remote_mkdir_safe(sftp, dir) files = flatten([[(src, d) for src in d.glob('*.py')] for d in dirs]) upload_files(connection, files)
def main(): args = docopt(__doc__, version='MOD Melody Extractor 1.0') # Argument parsing SP.enabled = args['--verbose'] input_file = args['<input-mod>'] output_file = args['<output-mod>'] max_distance = int(args['--max-distance']) min_length = int(args['--min-length']) min_unique = int(args['--min-unique']) max_repeat = int(args['--max-repeat']) mu_factor = float(args['--mu-factor']) mu_threshold = int(args['--mu-threshold']) trailer = int(args['--trailer']) transpose = not args['--no-transpose'] # Load mod mod = load_file(input_file) rows = linearize_rows(mod) # Extract and filter melodies melodies = flatten( extract_sample_groups(rows, col_idx, max_distance, mu_factor, mu_threshold) for col_idx in range(4)) melodies = [ melody for (melody, msg) in melodies if is_melody(melody, min_length, min_unique, max_repeat) ] if transpose: melodies = [move_to_c(melody) for melody in melodies] melodies = [ remove_ending_silence(melody) for melody in filter_duplicate_melodies(melodies) ] SP.header('%d MELODIES' % len(melodies)) for melody in melodies: for cell in melody: SP.print(cell_to_string(cell)) SP.print('') SP.leave() melodies = [add_trailer(melody, trailer) for melody in melodies] if not melodies: fmt = 'Sorry, found no melodies in "%s"!' print(fmt % args.input_module.name) exit(1) cells = flatten(melodies) rows = [[c, ZERO_CELL, ZERO_CELL, ZERO_CELL] for c in cells] patterns = list(rows_to_patterns(rows)) n_patterns = len(patterns) pattern_table = list(range(n_patterns)) + [0] * (128 - n_patterns) mod_out = dict(title=mod.title, sample_headers=mod.sample_headers, n_orders=n_patterns, restart_pos=0, pattern_table=bytearray(pattern_table), initials='M.K.'.encode('utf-8'), patterns=patterns, samples=mod.samples) save_file(output_file, mod_out)