parser.add_argument('-o', '--outfolder', type=str, help='The output folder for sliced audio') args = parser.parse_args() if not args.outfolder or not args.infolder: print('Please provide a valid input/output folder!') exit() # Prepare for work tmp_dir = tempfile.mkdtemp() input_folder = args.infolder input_files = os.listdir(input_folder) output_folder = args.outfolder check_make(output_folder) def explode(idx: int): ## NoveltySlice on File novelty_src = os.path.join(input_folder, input_files[idx]) novelty_indices = os.path.join(tmp_dir, f'{input_files[idx]}_slices.wav') subprocess.call([ 'fluid-noveltyslice', '-source', novelty_src, '-indices', novelty_indices, '-fftsettings', '2048', '1024', '2048', '-threshold', '0.61', '-kernelsize', '3', '-filtersize', '1' ]) # Turn slices wav into a list data = bufspill(novelty_indices) sound = bufspill(novelty_src) book_end = len(
# Make paths/dirs x = x.replace(' ', '_') # get rid of dots at the start of a file if x[0] == '.': x = x[1:] full_path = os.path.join(root, x) raw_path = os.path.join(tmp_dir, f'{x}.raw') audio_path = os.path.join(output_directory, f'{x}.wav') # Do checks if current_size < MB_LIM: if check_size(full_path, 30000) and check_ext( full_path, BAD_EXTS): copyfile(full_path, raw_path) current_size += bytes_to_mb(os.path.getsize(raw_path)) if current_size < MB_LIM: convert(infile=raw_path, outfile=audio_path, encoding=args.encoding, bits=args.bits, channels=args.channels) tmp_dir = tempfile.mkdtemp() origin_dir = args.infolder audio_dir = os.path.join(project_root, args.outfolder) check_make(audio_dir) scrape(origin_dir, audio_dir) rmtree(tmp_dir) printp('Completed Scraping')
import subprocess as sp import multiprocessing as mp import os import sys import time from datamosh.utils import check_make, wipe_dir from datamosh.variables import unique_audio_folder, unique_audio_files, project_root, essentia_analysis num_jobs = len(unique_audio_files) simple_analysis_folder = os.path.join(essentia_analysis, 'simple_extractor_segmented') check_make(simple_analysis_folder) wipe_dir(simple_analysis_folder) def process(idx): input_file = os.path.join(unique_audio_folder, unique_audio_files[idx]) output_file = os.path.join(simple_analysis_folder, f'{unique_audio_files[idx]}.json') if not os.path.isfile(output_file): sp.call([ 'essentia_databend_simple_extractor', input_file, output_file, '8192', '256', ]) with mp.Pool() as pool:
this_script = os.getcwd() # Configuration printp('Reading configuration') cfg_path = os.path.join(this_script, sys.argv[1]) cfg = read_yaml(cfg_path) json_out = cfg['json'] input_data = cfg['input_data'] algorithm = cfg['algorithm'] normalisation = cfg['normalisation'] identifier = cfg['identifier'] folder_name = f'{algorithm}_{identifier}' output_path = os.path.join(this_script, 'outputs', folder_name) check_make(output_path) copyfile(cfg_path, os.path.join(output_path, 'configuration.yaml')) printp('Reading in data') feature = read_json(os.path.join(project_root, 'python_scripts', 'dimensionality_reduction', 'outputs', input_data)) keys = [x for x in feature.keys()] values = [y for y in feature.values()] data = np.array(values) printp('Normalising') if normalisation != 'none': if normalisation == 'minmax': scaler = MinMaxScaler() if normalisation == 'standardise': scaler = StandardScaler()
import subprocess as sp import os import sys import multiprocessing as mp from datamosh.variables import unique_audio_folder, unique_audio_files, project_root from datamosh.utils import check_make filtered_folder = os.path.join(project_root, 'DataAudioUnique_DC') check_make(filtered_folder) num_jobs = len(unique_audio_files) def remove_dc(infile: str, outfile: str, hertz: str): sp.Popen(['sox', '-D', '-V0', infile, outfile, 'highpass', hertz]) def process(idx: int): remove_dc(os.path.join(unique_audio_folder, unique_audio_files[idx]), os.path.join(filtered_folder, unique_audio_files[idx]), '10') with mp.Pool() as pool: for i, _ in enumerate(pool.imap_unordered(process, range(num_jobs)), 1): sys.stderr.write('\rdone {0:%}'.format(i / num_jobs))