예제 #1
0
def _vocal_separation(wav_list, out_folder):
    wavs = OrderedDict({os.path.basename(wav): wav for wav in wav_list})
    if os.path.exists(out_folder):
        # There are already some separated audio.
        sep_wavs = set(os.listdir(out_folder))
        diff_wavs = set(wavs.keys()) - sep_wavs
        logger.debug("Audio to be separated: %s", diff_wavs)

        # Check the difference of the separated audio and the received audio list.
        done_wavs = set(wavs.keys()) - diff_wavs
        wavs_copy = wavs.copy()
        for dwav in done_wavs:
            del wavs_copy[dwav]
        wav_list = list(wavs_copy.values())

    out_list = [jpath(out_folder, wav) for wav in wavs]
    if len(wav_list) > 0:
        separator = Separator('spleeter:2stems')
        separator._params["stft_backend"] = "librosa"  # pylint: disable=protected-access
        for idx, wav_path in enumerate(wav_list, 1):
            logger.info("Separation Progress: %d/%d - %s", idx, len(wav_list),
                        wav_path)
            separator.separate_to_file(wav_path, out_folder)

            # The separated tracks are stored in sub-folders.
            # Move the vocal track to the desired folder and rename them.
            fname, _ = os.path.splitext(os.path.basename(wav_path))
            sep_folder = jpath(out_folder, fname)
            vocal_track = jpath(sep_folder, "vocals.wav")
            shutil.move(vocal_track, jpath(out_folder, fname + ".wav"))
            shutil.rmtree(sep_folder)
    return out_list
예제 #2
0
def thread_scaling_test(dry_run, target_dir, exp_name, group='', param_l=[]):
  from scripts.conf.conf import machine_info, machine_conf
  from scripts.pluto.pluto_utils import run_pluto_test, tee
  import itertools, os, pickle
  from os.path import join as jpath

  target_dir = jpath(os.path.abspath("."),target_dir)


  #update the pinning information to use all cores
  th_max = machine_info['n_cores']
  const_pin_args = machine_conf['pinning_args']


  # machine_info['hostname']=='IVB_10core'
  kernels_limits = {'3d25pt':1089, '3d7pt':1217, '3d25pt_var':577, '3d7pt_var':769}
  increment = 64

  if(machine_info['hostname']=='Haswell_18core'):
    kernels_limits = {'3d25pt':1281, '3d7pt':1409, '3d25pt_var':769, '3d7pt_var':897}
    increment = 128

  points = dict()
  points['3d7pt']      = [896]
  points['3d25pt']     = [896]
  points['3d7pt_var']  = [768]
  points['3d25pt_var'] = [768]

  count=0
  for kernel in ['3d7pt', '3d7pt_var', '3d25pt', '3d25pt_var']:
    for N in points[kernel]:
      # get the tuned parameters
      if(dry_run==1):
        nt=32; param=[-1,-1,-1]
      if (kernel, N, 'MEM') in param_l.keys(): # use the tuned params of memory results
        param, nt = param_l[(kernel, N, 'MEM')]
        nt_r = nt*2
      else:
        print "Tuning required for stencil:%s N:%d"%(kernel, N)
        continue

      for th in list(range(1,1+th_max)):
        outfile=('pluto_kernel_%s_N%d_th%d_%s_%s.txt' % (kernel, N, th, group, exp_name[-13:]))
        outfile = jpath(target_dir, outfile)
        machine_conf['pinning_args'] =  const_pin_args + str(th-1)

        nt = max(30, int(nt_r*float(th)/float(th_max)) )

        if(dry_run==0):
          fp = open(outfile, 'w')
          tee(fp, outfile)
#        print outfile, param
        test_str, telapsed = run_pluto_test(dry_run=dry_run, kernel=kernel, nx=N, ny=N, nz=N, nt=nt, params=param, outfile=outfile)
        if(dry_run==0):
          tee(fp, test_str)
          fp.close()
        count = count+1
  return count
def igs_test(dry_run, target_dir, exp_name, group='', param_l=[]):
  from scripts.conf.conf import machine_info
  from scripts.pluto.pluto_utils import run_pluto_test
  import itertools, os, pickle
  from os.path import join as jpath

  target_dir = jpath(os.path.abspath("."),target_dir)


  # machine_info['hostname']=='IVB_10core'
  kernels_limits = {'3d25pt':1089, '3d7pt':1217, '3d25pt_var':577, '3d7pt_var':769}
  increment = 64

  if(machine_info['hostname']=='Haswell_18core'):
    kernels_limits = {'3d25pt':1281, '3d7pt':1409, '3d25pt_var':769, '3d7pt_var':897}
    increment = 128

  points = dict()
  points['3d7pt'] = [64] + list(range(128, 5000, increment))
  points['3d7pt_var'] = points['3d7pt']
  points['3d25pt'] = points['3d7pt']
  points['3d25pt_var'] = points['3d7pt']

  count=0
  #for kernel in ['3d7pt', '3d7pt_var', '3d25pt']:#, '3d25pt_var']:
  for kernel in [ '3d25pt', '3d25pt_var']:
    for N in points[kernel]:
      if (N < kernels_limits[kernel]):
        outfile=('pluto_kernel_%s_N%d_%s_%s.txt' % (kernel, N, group, exp_name[-13:]))
        outfile = jpath(target_dir, outfile)
        if(dry_run==1):
          nt=32; param=[-1,-1,-1]
#        nt = max(int(k_time_scale[kernel]/(N**3/1e6)), 30)
        if (kernel, N, group) in param_l.keys():
          continue # results exist for this test case

        if (kernel, N, 'MEM') in param_l.keys(): # use the tuned params of memory results
          if(dry_run==0): fp = open(outfile, 'w')
          param, nt = param_l[(kernel, N, 'MEM')]
          nt = nt*2
        else:
#          continue
          if(dry_run==0): 
            fp = open(outfile, 'w')
            param, nt, tune_res = pluto_tuner(kernel=kernel, nx=N, ny=N, nz=N, fp=fp)
            with open(outfile[:-3]+'p', 'w') as fpickle:
              pickle.dump(tune_res, fpickle)
        if(dry_run==0): tee(fp, outfile)
#        print outfile, param
        test_str, telapsed = run_pluto_test(dry_run=dry_run, kernel=kernel, nx=N, ny=N, nz=N, nt=nt, params=param, outfile=outfile)
        if(dry_run==0):
          tee(fp, test_str)
          fp.close()
        count = count+1
  return count
예제 #4
0
 def _resolve_feature_output_path(self, dataset_path, settings):  # pylint: disable=R0201
     if settings.dataset.feature_save_path == "+":
         base_output_path = dataset_path
         settings.dataset.save_path = dataset_path
     else:
         base_output_path = settings.dataset.feature_save_path
     train_feat_out_path = jpath(base_output_path, "train_feature")
     test_feat_out_path = jpath(base_output_path, "test_feature")
     ensure_path_exists(train_feat_out_path)
     ensure_path_exists(test_feat_out_path)
     return train_feat_out_path, test_feat_out_path
예제 #5
0
파일: resources.py 프로젝트: macskay/zkit
def load():
    pygame.font.init()

    logger.info("loading")
    from zkit import config

    global resource_path
    global sounds, images, music, fonts, maps, tiles
    global border, border_path

    resource_path = abspath(jpath(dirname(__file__), 'data'))
    border_path = jpath(resource_path, 'dialog.png')
    sounds_path = jpath(resource_path, 'sounds', '*')

    # load the tiles
    tile_path = jpath(resource_path, 'tiles', '*png')
    for filename in glob.glob(tile_path):
        path = jpath(resource_path, 'tiles', filename)
        image = pygame.image.load(path).convert_alpha()
        tiles[basename(filename)] = image
        yield path, image

    for name, filename in config.items('font-files'):
        path = jpath(resource_path, 'fonts', filename)
        fonts[name] = path
        yield path, path

    vol = config.getint('sound', 'sound-volume') / 100.
    for filename in glob.glob(sounds_path):
        logger.info("loading %s", filename)
        try:
            if isfile(filename):
                sound = pygame.mixer.Sound(filename)
                sound.set_volume(vol)
                sounds[basename(filename)] = sound
                yield filename, sound
        except pygame.error:
            pass

    for name, filename in config.items('image-files'):
        path = jpath(resource_path, 'images', filename)
        logger.info("loading %s", path)
        image = pygame.image.load(path)
        images[name] = image
        yield path, image

    for name, filename in config.items('map-files'):
        path = jpath(resource_path, 'maps', filename)
        logger.info("loading %s", path)
        maps[name] = path
        yield path, map

    for name, filename in config.items('music-files'):
        path = jpath(resource_path, 'music', filename)
        logger.info("loading %s", path)
        music[name] = path
        yield path, path
예제 #6
0
    def generate_feature(self, dataset_path, beat_settings=None, num_threads=8):
        """Extract the feature from the given dataset.

        To train the model, the first step is to pre-process the data into feature
        representations. After downloading the dataset, use this function to generate
        the feature by giving the path of the stored dataset.

        To specify the output path, modify the attribute
        ``beat_settings.dataset.feature_save_path``.
        It defaults to the folder under where the dataset stored, generating
        two folders: ``train_feature`` and ``test_feature``.

        Parameters
        ----------
        dataset_path: Path
            Path to the downloaded dataset.
        beat_settings: BeatSettings
            The configuration instance that holds all relative settings for
            the life-cycle of building a model.
        num_threads:
            Number of threads for parallel extraction the feature.
        """
        settings = self._validate_and_get_settings(beat_settings)

        # Resolve feature output path
        train_feat_out_path, test_feat_out_path = self._resolve_feature_output_path(dataset_path, settings)
        logger.info("Output training feature to %s", train_feat_out_path)
        logger.info("Output testing feature to %s", test_feat_out_path)

        train_labels = MusicNetStructure.get_train_labels(dataset_path)
        test_labels = MusicNetStructure.get_test_labels(dataset_path)

        logger.info(
            "Start extract training feature of the dataset. "
            "This may take time to finish and affect the computer's performance"
        )
        _parallel_feature_extraction(train_labels, train_feat_out_path, settings.feature, num_threads=num_threads)

        logger.info(
            "Start extract test feature of the dataset. "
            "This may take time to finish and affect the computer's performance"
        )
        _parallel_feature_extraction(test_labels, test_feat_out_path, settings.feature, num_threads=num_threads)

        # Writing out the settings
        write_yaml(settings.to_json(), jpath(train_feat_out_path, ".success.yaml"))
        write_yaml(settings.to_json(), jpath(test_feat_out_path, ".success.yaml"))
        logger.info("All done")
예제 #7
0
def _parallel_feature_extraction(wav_paths,
                                 label_paths,
                                 out_path,
                                 feat_settings,
                                 num_threads=3):
    label_path_mapping = _gen_wav_label_path_mapping(label_paths)
    iters = enumerate(
        parallel_generator(_all_in_one_extract,
                           wav_paths,
                           max_workers=num_threads,
                           use_thread=True,
                           chunk_size=num_threads,
                           label_path_mapping=label_path_mapping,
                           feat_settings=feat_settings))
    for idx, ((patch_cqt, m_beat_arr, label_128, label_13),
              audio_idx) in iters:
        audio = wav_paths[audio_idx]
        # print(f"Progress: {idx+1}/{len(wav_paths)} - {audio}" + " "*6, end="\r")  # noqa: E226
        logger.info("Progress: %d/%d - %s", idx + 1, len(wav_paths),
                    audio)  # noqa: E226

        basename = os.path.basename(audio)
        filename, _ = os.path.splitext(basename)
        out_hdf = jpath(out_path, filename + ".hdf")

        saved = False
        retry_times = 5
        for retry in range(retry_times):
            if saved:
                break
            try:
                with h5py.File(out_hdf, "w") as out_f:
                    out_f.create_dataset("feature",
                                         data=patch_cqt,
                                         compression="gzip",
                                         compression_opts=3)
                    out_f.create_dataset("label",
                                         data=label_13,
                                         compression="gzip",
                                         compression_opts=3)
                    out_f.create_dataset("label_128",
                                         data=label_128,
                                         compression="gzip",
                                         compression_opts=3)
                    out_f.create_dataset("cqt_mini_beat_arr",
                                         data=m_beat_arr,
                                         compression="gzip",
                                         compression_opts=3)
                    saved = True
            except OSError as exp:
                logger.warning(
                    "OSError occurred, retrying %d times. Reason: %s",
                    retry + 1, str(exp))
                time.sleep(0.5 * 2**retry)
        if not saved:
            logger.error(
                "H5py failed to save the feature file after %d retries.",
                retry_times)
            raise OSError
    print("")
예제 #8
0
 def load(self):
     self.fp.seek(0)
     self.clock = pygame.time.Clock()
     for line in self.fp.readlines():
         if line.split()[0] == 'name':
             self.name = line.split('name', 1)[1].strip('\n')
             if self.name[0] == ' ':
                 self.name = self.name[1:]
             self.n = '[' + self.name + ']'
             print(self.n + 'Loading up.')
         if line.split()[0] == 'caption':
             pygame.display.set_caption(
                 line.split('caption', 1)[1].strip('\n')[1:])
         if line.split()[0] == 'codefile':
             code_path_orig = line.split('codefile', 1)[1].strip('\n')[1:]
             code_path = code_path_orig + '.py'
             self.game_code = imp.load_source(
                 code_path_orig, jpath('..', 'game', code_path))
         if line.split()[0] == 'mapfiles':
             lineremainder = line.split('mapfiles', 1)[1]
             self.maparray = lineremainder[1:].strip('\n').split()
         if line.split()[0] == 'initial_map':
             lineremainder = line.split('initial_map', 1)[1]
             self.initial_map = lineremainder[1:].strip('\n')
     self.fp.seek(0)
     self.camera_pos = (0, 0)
     print(self.n + 'Done.')
예제 #9
0
	def load(self):
		self.fp.seek(0)
		self.clock=pygame.time.Clock()
		for line in self.fp.readlines():
			if line.split()[0]=='name':
				self.name=line.split('name', 1)[1].strip('\n')
				if self.name[0]==' ':
					self.name=self.name[1:]
				self.n='['+self.name+']'
				print(self.n+'Loading up.')
			if line.split()[0]=='caption':
				pygame.display.set_caption(line.split('caption', 1)[1].strip('\n')[1:])
			if line.split()[0]=='codefile':
				code_path_orig=line.split('codefile', 1)[1].strip('\n')[1:]
				code_path=code_path_orig+'.py'
				self.game_code=imp.load_source(code_path_orig,jpath('..','game',code_path))
			if line.split()[0]=='mapfiles':
				lineremainder=line.split('mapfiles',1)[1]
				self.maparray=lineremainder[1:].strip('\n').split()
			if line.split()[0]=='initial_map':
				lineremainder=line.split('initial_map',1)[1]
				self.initial_map=lineremainder[1:].strip('\n')
		self.fp.seek(0)
		self.camera_pos=(0,0)
		print(self.n+'Done.')
예제 #10
0
    def generate_feature(self,
                         dataset_path,
                         chord_settings=None,
                         num_threads=4):
        """Extract feature of McGill BillBoard dataset.

        There are three main features that will be used in the training:

        * chroma: input feature of the NN model
        * chord: the first type of the ground-truth
        * chord_change: the second type of the ground-truth

        The last two feature will be both used for computing the training loss.
        During the feature extraction, the feature data is stored as a numpy array
        with named field, makes it works like a dict type.
        """
        settings = self._validate_and_get_settings(chord_settings)

        # Resolve feature output path
        train_feat_out_path, test_feat_out_path = self._resolve_feature_output_path(
            dataset_path, settings)
        logger.info("Output training feature to %s", train_feat_out_path)
        logger.info("Output testing feature to %s", test_feat_out_path)

        train_data_pair = McGillBillBoard.get_train_data_pair(dataset_path)
        test_data_pair = McGillBillBoard.get_test_data_pair(dataset_path)
        logger.info("Total number of training data: %d", len(train_data_pair))
        logger.info("Total number of testing data: %d", len(test_data_pair))

        # Start feature extraction
        logger.info("Start to extract training feature")
        _parallel_feature_extraction(train_data_pair,
                                     train_feat_out_path,
                                     num_threads=num_threads)

        logger.info("Start to extract testing feature")
        _parallel_feature_extraction(test_data_pair,
                                     test_feat_out_path,
                                     num_threads=num_threads)

        # Writing out the settings
        write_yaml(settings.to_json(),
                   jpath(train_feat_out_path, ".success.yaml"))
        write_yaml(settings.to_json(),
                   jpath(test_feat_out_path, ".success.yaml"))
        logger.info("All done")
예제 #11
0
    def _output_midi(self, output, input_audio, midi=None, verbose=True):
        if output is None:
            return None

        if os.path.isdir(output):
            output = jpath(output, get_filename(input_audio))
        if midi is not None:
            out_path = output if output.endswith(".mid") else f"{output}.mid"
            midi.write(out_path)
            if verbose:
                logger.info("MIDI file has been written to %s.", out_path)
        return output
예제 #12
0
def _parallel_feature_extraction(audio_list,
                                 out_path,
                                 feat_settings,
                                 num_threads=4):
    feat_extract_params = {
        "hop": feat_settings.hop_size,
        "win_size": feat_settings.window_size,
        "fr": feat_settings.frequency_resolution,
        "fc": feat_settings.frequency_center,
        "tc": feat_settings.time_center,
        "g": feat_settings.gamma,
        "bin_per_octave": feat_settings.bins_per_octave,
        "harmonic_num": feat_settings.harmonic_number
    }

    iters = enumerate(
        parallel_generator(extract_cfp_feature,
                           audio_list,
                           max_workers=num_threads,
                           use_thread=True,
                           chunk_size=num_threads,
                           harmonic=feat_settings.harmonic,
                           **feat_extract_params))
    for idx, (feature, audio_idx) in iters:
        audio = audio_list[audio_idx]
        # logger.info("Progress: %s/%s - %s", idx+1, len(audio_list), audio)
        print(f"Progress: {idx+1}/{len(audio_list)} - {audio}" + " " * 6,
              end="\r")  # noqa: E226

        basename = os.path.basename(audio)
        filename, _ = os.path.splitext(basename)
        out_hdf = jpath(out_path, filename + ".hdf")

        saved = False
        retry_times = 5
        for retry in range(retry_times):
            if saved:
                break
            try:
                with h5py.File(out_hdf, "w") as out_f:
                    out_f.create_dataset("feature", data=feature)
                    saved = True
            except OSError as exp:
                logger.warning(
                    "OSError occurred, retrying %d times. Reason: %s",
                    retry + 1, str(exp))
        if not saved:
            logger.error(
                "H5py failed to save the feature file after %d retries.",
                retry_times)
            raise OSError
    print("")
예제 #13
0
def _parallel_feature_extraction(data_pair, out_path, num_threads=4):
    iters = enumerate(
        parallel_generator(_extract_feature_arg_wrapper,
                           data_pair,
                           max_workers=num_threads,
                           chunk_size=num_threads))
    for idx, ((feature), feat_idx) in iters:
        f_name = os.path.dirname(data_pair[feat_idx][0])

        # logger.info("Progress: %d/%d - %s", idx + 1, len(data_pair), f_name)
        print(f"Progress: {idx+1}/{len(data_pair)} - {f_name}", end="\r")
        out_hdf = jpath(out_path, os.path.basename(f_name) + ".hdf")
        _write_feature(feature, out_path=out_hdf)
예제 #14
0
파일: app.py 프로젝트: ykhorzon/omnizart
def _parallel_feature_extraction(data_pair,
                                 out_path,
                                 label_extractor,
                                 feat_settings,
                                 num_threads=4):
    feat_extract_params = {
        "hop": feat_settings.hop_size,
        "down_fs": feat_settings.sampling_rate,
        "win_size": feat_settings.window_size
    }

    iters = enumerate(
        parallel_generator(_all_in_one_extract,
                           data_pair,
                           max_workers=num_threads,
                           use_thread=True,
                           chunk_size=num_threads,
                           label_extractor=label_extractor,
                           t_unit=feat_settings.hop_size,
                           **feat_extract_params))

    for idx, ((feature, label), audio_idx) in iters:
        audio = data_pair[audio_idx][0]

        print(f"Progress: {idx+1}/{len(data_pair)} - {audio}" + " " * 6,
              end="\r")  # noqa: E226

        filename, _ = os.path.splitext(os.path.basename(audio))
        out_hdf = jpath(out_path, filename + ".hdf")
        saved = False
        retry_times = 5
        for retry in range(retry_times):
            if saved:
                break
            try:
                with h5py.File(out_hdf, "w") as out_f:
                    out_f.create_dataset("feature", data=feature)
                    out_f.create_dataset("label", data=label)
                    saved = True
            except OSError as exp:
                logger.warning(
                    "OSError occurred, retrying %d times. Reason: %s",
                    retry + 1, str(exp))
        if not saved:
            logger.error(
                "H5py failed to save the feature file after %d retries.",
                retry_times)
            raise OSError
    print("")
예제 #15
0
def _parallel_feature_extraction_v2(data_pair,
                                    out_path,
                                    feat_settings,
                                    num_threads=5):
    iter_num = len(data_pair) / num_threads
    if int(iter_num) < iter_num:
        iter_num += 1
    iter_num = int(iter_num)

    for iter_idx in range(iter_num):
        loop = asyncio.get_event_loop()
        tasks = []
        for chunk in range(num_threads):
            wav_idx = num_threads * iter_idx + chunk  # noqa: E226
            if wav_idx >= len(data_pair):
                break
            logger.info("%s/%s - %s", wav_idx + 1, len(data_pair),
                        data_pair[wav_idx][0])  # noqa: E226
            tasks.append(
                loop.create_task(
                    _async_all_in_one_extract(data_pair[wav_idx][0],
                                              data_pair[wav_idx][1],
                                              feat_settings)))

        group = asyncio.gather(*tasks, return_exceptions=True)
        print("Waiting...")
        results = loop.run_until_complete(group)
        for result in results:
            patch_cqt, m_beat_arr, label_128, label_13, wav_path = result
            basename = os.path.basename(wav_path)
            filename, _ = os.path.splitext(basename)
            out_hdf = jpath(out_path, filename + ".hdf")
            with h5py.File(out_hdf, "w") as out_f:
                out_f.create_dataset("feature",
                                     data=patch_cqt,
                                     compression="gzip",
                                     compression_opts=3)
                out_f.create_dataset("label",
                                     data=label_13,
                                     compression="gzip",
                                     compression_opts=3)
                out_f.create_dataset("label_128",
                                     data=label_128,
                                     compression="gzip",
                                     compression_opts=3)
                out_f.create_dataset("mini_beat_arr",
                                     data=m_beat_arr,
                                     compression="gzip",
                                     compression_opts=3)
예제 #16
0
def _parallel_feature_extraction(data_pair,
                                 label_extractor,
                                 out_path,
                                 feat_settings,
                                 num_threads=4):
    feat_extract_params = {
        "hop": feat_settings.hop_size,
        "fr": feat_settings.frequency_resolution,
        "fc": feat_settings.frequency_center,
        "tc": feat_settings.time_center,
        "g": feat_settings.gamma,
        "bin_per_octave": feat_settings.bins_per_octave
    }

    iters = enumerate(
        parallel_generator(_all_in_one_extract,
                           data_pair,
                           max_workers=num_threads,
                           chunk_size=num_threads,
                           label_extractor=label_extractor,
                           t_unit=feat_settings.hop_size,
                           **feat_extract_params))
    for idx, ((feature, label), audio_idx) in iters:
        audio = data_pair[audio_idx][0]
        logger.info("Progress: %s/%s - %s", idx + 1, len(data_pair), audio)
        # print(f"Progress: {idx+1}/{len(data_pair)} - {audio}" + " "*6, end="\r")  # noqa: E226

        # Trim to the same length
        max_len = min(len(feature), len(label))
        feature = feature[:max_len]
        label = label[:max_len]

        basename = os.path.basename(audio)
        filename, _ = os.path.splitext(basename)
        out_hdf = jpath(out_path, filename + ".hdf")
        with h5py.File(out_hdf, "w") as out_f:
            out_f.create_dataset("feature",
                                 data=feature,
                                 compression="gzip",
                                 compression_opts=3)
            out_f.create_dataset("label",
                                 data=label,
                                 compression="gzip",
                                 compression_opts=3)
예제 #17
0
파일: pluto_utils.py 프로젝트: ecrc/girih
def run_pluto_test(dry_run, kernel, nx, ny, nz, nt, params, outfile='', pinning_cmd=-1, pinning_args=-1, auto_tuning=0):
  import os, subprocess
  from os.path import join as jpath
  from string import Template
  from scripts.conf.conf import machine_conf, machine_info
  import time

  if(pinning_cmd==-1): pinning_cmd = machine_conf['pinning_cmd']
  if(pinning_args==-1): pinning_args = machine_conf['pinning_args']

  job_template=Template(
"""$pinning_cmd $pinning_args $exec_path $nx $ny $nz $nt $outfile""")

  if(outfile!=''):
    outfile = ' | tee -a ' + outfile

  # set the executable
  exec_name = 'lbpar_' + kernel
  #add the tile size parameters to the executable name
  exec_dir = exec_name + "%d_%d_%d_%d"%(params[0], params[0], params[1], params[2])

  exec_path = jpath(os.path.abspath("."),'pluto_examples', 'gen_kernels', exec_dir,  exec_name)


  job_cmd = job_template.substitute(nx=nx, ny=ny, nz=nz, nt=nt, kernel=kernel,
                       outfile=outfile, exec_path=exec_path, pinning_cmd=pinning_cmd,
                       pinning_args=pinning_args)

  tstart = time.time()
  test_str=''
  if(auto_tuning):
    proc = subprocess.Popen(job_cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    test_str = proc.stdout.read()
  else:
    print job_cmd
    test_str = job_cmd + '\n'
    if(dry_run==0):
      proc = subprocess.Popen(job_cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
      test_str = test_str + proc.stdout.read()
  tend = time.time()
  return test_str, (tend-tstart)
예제 #18
0
    def _get_train_test_split_ids(cls, dataset_path):
        """Get train/test set split indexes.

        Default will use the folder ID as the partition base.
        The index number smaller than `train_test_split_id` will be taken as the
        training set, and others for testing set.

        Returns
        -------
        train_ids: list[str]
            Folder ids of training set.
        test_ids: list[str]
            Folder ids of testing set
        """
        index_file_path = jpath(dataset_path, cls.index_file_path)
        reader = csv.DictReader(open(index_file_path, "r"), delimiter=",")
        name_id_mapping = {}
        for data in reader:
            pid = int(data["id"])
            if data["title"] != "" and pid not in cls.ignore_ids:
                name = data["artist"] + ": " + data["title"]
                if name not in name_id_mapping:
                    name_id_mapping[name] = []
                name_id_mapping[name].append(
                    pid)  # Repetition count: 1->613, 2->110, 3->19

        train_ids, test_ids = [], []  # Folder ids
        for pids in name_id_mapping.values():
            if len(pids) <= 2:
                pid = pids[0]
            else:
                pid = pids[2]

            if pid <= cls.train_test_split_id:
                train_ids.append(str(pid).zfill(4))
            else:
                test_ids.append(str(pid).zfill(4))

        return train_ids, test_ids
예제 #19
0
	def __init__(self):
		#self.PYGAME_KEY_CONVERSION_BASE={pygame.K_a:'a',pygame.K_w:'w',pygame.K_d='d'}
		self.PYGAME_KEY_CONVERSION_BASE={}
		self.flags = pygame.DOUBLEBUF | pygame.HWSURFACE
		self.fp=open(jpath("..","game",'game.tgf'),mode='r')
		self.fp.seek(0)
		for line in self.fp.readlines():
			if line.split()[0]=='resolution':
				#self.xres,self.yres=line.split('resolution', 1)[1].strip('\n').split()
				self.xres,self.yres=(1600,900)
		self.fp.seek(0)
		self.screen=pygame.display.set_mode((int(self.xres),int(self.yres)),self.flags)
		self.layer1 = pygame.sprite.Group()
		self.layer2 = pygame.sprite.Group()
		self.layer3 = pygame.sprite.Group()
		self.layer4 = pygame.sprite.Group()
		self.layer5 = pygame.sprite.Group()
		self.alpha_test_map=pygame.image.load('../media/alpha_darkness_1.png').convert_alpha()
		self.layer1_c = []
		self.layer2_c = []
		self.layer3_c = []
		self.protocol=None
		self.ents_by_id={}
		self.shaking=False
		self.shakes=0
		self.shakesize=0
		self.curr_id=0
		self.map_data=None
		self.layer4_c = []
		self.layer5_c = []
		self.locking=False
		self.reqs_update=[]
		pygame.mixer.pre_init(44100, 16, 6, 4096)
		pygame.mixer.init()
		print('[launcher]Mixer initialized.')
		pygame.init()
		self.xres=int(self.xres)
		self.yres=int(self.yres)
예제 #20
0
파일: app.py 프로젝트: ykhorzon/omnizart
def _parallel_feature_extraction(data_pair_list,
                                 out_path,
                                 feat_settings,
                                 num_threads=4):
    feat_params = {
        "patch_size": feat_settings.patch_size,
        "threshold": feat_settings.peak_threshold,
        "down_fs": feat_settings.sampling_rate,
        "hop": feat_settings.hop_size,
        "win_size": feat_settings.window_size,
        "fr": feat_settings.frequency_resolution,
        "fc": feat_settings.frequency_center,
        "tc": feat_settings.time_center,
        "g": feat_settings.gamma,
        "bin_per_octave": feat_settings.bins_per_octave,
    }

    iters = enumerate(
        parallel_generator(_all_in_one_extract,
                           data_pair_list,
                           max_workers=num_threads,
                           use_thread=True,
                           chunk_size=num_threads,
                           **feat_params))
    for idx, ((feat, mapping, zzz, label), audio_idx) in iters:
        audio = data_pair_list[audio_idx][0]

        # logger.info("Progress: %s/%s - %s", idx+1, len(data_pair_list), audio)
        print(f"Progress: {idx + 1}/{len(data_pair_list)} - {audio}", end="\r")

        filename = get_filename(audio)
        out_hdf = jpath(out_path, filename + ".hdf")
        with h5py.File(out_hdf, "w") as out_f:
            out_f.create_dataset("feature", data=feat)
            out_f.create_dataset("mapping", data=mapping)
            out_f.create_dataset("Z", data=zzz)
            out_f.create_dataset("label", data=label)
    print("")
예제 #21
0
 def __init__(self):
     self.fp = open(jpath("..", "game", 'game.tgf'), mode='r')
     self.fp.seek(0)
     screen = pygame.display.set_mode((800, 600))
     self.layer1 = pygame.sprite.Group()
     self.layer2 = pygame.sprite.Group()
     self.layer3 = pygame.sprite.Group()
     self.layer4 = pygame.sprite.Group()
     self.layer5 = pygame.sprite.Group()
     self.layer1_c = []
     self.layer2_c = []
     self.rem_index_curr = 0
     self.layer3_c = []
     self.clients = []
     self.map_data = None
     self.input = {}
     self.ents_by_id = {}
     self.curr_id = 0
     self.layer4_c = []
     self.layer5_c = []
     self.locking = False
     self.reqs_update = []
     pygame.init()
예제 #22
0
	def __init__(self):
		self.fp=open(jpath("..","game",'game.tgf'),mode='r')
		self.fp.seek(0)
		screen = pygame.display.set_mode((800, 600))
		self.layer1 = pygame.sprite.Group()
		self.layer2 = pygame.sprite.Group()
		self.layer3 = pygame.sprite.Group()
		self.layer4 = pygame.sprite.Group()
		self.layer5 = pygame.sprite.Group()
		self.layer1_c = []
		self.layer2_c = []
		self.rem_index_curr=0
		self.layer3_c = []
		self.clients=[]
		self.map_data=None
		self.input={}
		self.ents_by_id={}
		self.curr_id=0
		self.layer4_c = []
		self.layer5_c = []
		self.locking=False
		self.reqs_update=[]
		pygame.init()
예제 #23
0
def _parallel_feature_extraction(feat_list, out_path, feat_settings, num_threads=4):
    iters = enumerate(
        parallel_generator(
            _all_in_one_extract,
            feat_list,
            max_workers=num_threads,
            chunk_size=num_threads,
            t_unit=feat_settings.time_unit
        )
    )

    for idx, ((feature, beat_arr, down_beat_arr), feat_idx) in iters:
        feat = feat_list[feat_idx]

        print(f"Progress: {idx+1}/{len(feat_list)} - {feat}" + " "*6, end="\r")  # noqa: E226
        # logger.info("Progress: %s/%s - %s", idx+1, len(feat_list), feat)

        filename, _ = os.path.splitext(os.path.basename(feat))
        out_hdf = jpath(out_path, filename + ".hdf")
        with h5py.File(out_hdf, "w") as out_f:
            out_f.create_dataset("feature", data=feature)
            out_f.create_dataset("beat", data=beat_arr)
            out_f.create_dataset("down_beat", data=down_beat_arr)
    print("")
예제 #24
0
def get_text(heading):
    fh = open(jpath(resource_path, 'dialogs.txt'))

    found = False
    while 1:
        line = fh.readline().strip()
        if line.lower() == heading.lower():
            found = True
            break

    if not found:
        raise ValueError('heading not found: {}'.format(heading))

    line = fh.readline()
    if not line.startswith('='):
        raise ValueError('improperly formatted header: {}'.format(heading))

    while 1:
        line = fh.readline().strip()
        if line.startswith("="):
            break
        if not line:
            continue
        yield line
예제 #25
0
    def train(self,
              feature_folder,
              model_name=None,
              input_model_path=None,
              music_settings=None):
        """Model training.

        Train the model from scratch or continue training given a model checkpoint.

        Parameters
        ----------
        feature_folder: Path
            Path to the generated feature.
        model_name: str
            The name of the trained model. If not given, will default to the
            current timestamp.
        input_model_path: Path
            Specify the path to the model checkpoint in order to fine-tune
            the model.
        music_settings: MusicSettings
            The configuration that holds all relative settings for
            the life-cycle of model building.
        """
        settings = self._validate_and_get_settings(music_settings)

        if input_model_path is not None:
            logger.info("Continue to train on model: %s", input_model_path)
            model, prev_set = self._load_model(
                input_model_path, custom_objects=self.custom_objects)
            settings.training.timesteps = prev_set.training.timesteps
            settings.training.label_type = prev_set.training.label_type
            settings.training.channels = prev_set.training.channels
            settings.model.save_path = prev_set.model.save_path
            settings.transcription_mode = prev_set.transcription_mode

        logger.info("Using label type: %s", settings.training.label_type)
        l_type = LabelType(settings.training.label_type)
        settings.transcription_mode = self.label_trans_mode_mapping[
            settings.training.label_type]

        logger.info("Constructing dataset instance")
        split = settings.training.steps / (settings.training.steps +
                                           settings.training.val_steps)
        train_feat_files, val_feat_files = get_train_val_feat_file_list(
            feature_folder, split=split)

        output_types = (tf.float32, tf.float32)
        train_dataset = MusicDatasetLoader(
                l_type.get_conversion_func(),
                feature_files=train_feat_files,
                num_samples=settings.training.batch_size * settings.training.steps,
                timesteps=settings.training.timesteps,
                channels=[FEATURE_NAME_TO_NUMBER[ch_name] for ch_name in settings.training.channels],
                feature_num=settings.training.feature_num
            ) \
            .get_dataset(settings.training.batch_size, output_types=output_types)
        val_dataset = MusicDatasetLoader(
                l_type.get_conversion_func(),
                feature_files=val_feat_files,
                num_samples=settings.training.val_batch_size * settings.training.val_steps,
                timesteps=settings.training.timesteps,
                channels=[FEATURE_NAME_TO_NUMBER[ch_name] for ch_name in settings.training.channels],
                feature_num=settings.training.feature_num
            ) \
            .get_dataset(settings.training.val_batch_size, output_types=output_types)

        if input_model_path is None:
            logger.info("Creating new model with type: %s",
                        settings.model.model_type)
            model_func = {
                "aspp": semantic_segmentation,
                "attn": semantic_segmentation_attn
            }[settings.model.model_type]
            model = model_func(timesteps=settings.training.timesteps,
                               out_class=l_type.get_out_classes(),
                               ch_num=len(settings.training.channels))

        logger.info("Compiling model with loss function type: %s",
                    settings.training.loss_function)
        loss_func = {
            "smooth":
            lambda y, x: smooth_loss(y, x, total_chs=l_type.get_out_classes()),
            "focal":
            focal_loss,
            "bce":
            tf.keras.losses.BinaryCrossentropy()
        }[settings.training.loss_function]
        model.compile(optimizer="adam", loss=loss_func, metrics=['accuracy'])

        logger.info("Resolving model output path")
        if model_name is None:
            model_name = str(datetime.now()).replace(" ", "_")
        if not model_name.startswith(settings.model.save_prefix):
            model_name = settings.model.save_prefix + "_" + model_name
        model_save_path = jpath(settings.model.save_path, model_name)
        ensure_path_exists(model_save_path)
        write_yaml(settings.to_json(),
                   jpath(model_save_path, "configurations.yaml"))
        write_yaml(model.to_yaml(),
                   jpath(model_save_path, "arch.yaml"),
                   dump=False)
        logger.info("Model output to: %s", model_save_path)

        logger.info("Constructing callbacks")
        callbacks = [
            EarlyStopping(patience=settings.training.early_stop),
            ModelCheckpoint(model_save_path, save_weights_only=True)
        ]
        logger.info("Callback list: %s", callbacks)

        logger.info("Start training")
        history = train_epochs(model,
                               train_dataset,
                               validate_dataset=val_dataset,
                               epochs=settings.training.epoch,
                               steps=settings.training.steps,
                               val_steps=settings.training.val_steps,
                               callbacks=callbacks)
        return model_save_path, history
예제 #26
0
def train_model(train_loader,
                test_loader,
                device,
                lr,
                epochs,
                output_path,
                valid_loader=False):
    model = CNN().to(device)
    optimizer = SGD(model.parameters(), lr=lr)

    average_loss_train = []
    average_loss_test = []

    accuracy_train = []
    accuracy_test = []

    for epoch in range(epochs):
        model.train()
        correct_train, loss_train, _ = loop_dataset(model, train_loader,
                                                    device, optimizer)

        print(
            f'Epoch {epoch} : average train loss - {np.mean(loss_train)}, train accuracy - {correct_train}'
        )

        average_loss_train.append(np.mean(loss_train))
        accuracy_train.append(correct_train)

        model.eval()
        correct_test, loss_test, _ = loop_dataset(model, test_loader, device)

        print(
            f'Epoch {epoch} : average test loss - {np.mean(loss_test)}, test accuracy - {correct_test}'
        )

        average_loss_test.append(np.mean(loss_test))
        accuracy_test.append(correct_test)

    model.eval()

    for i in range(0, len(model.layers)):
        model.layers[i].register_forward_hook(forward_hook)
    if valid_loader:
        correct_valid, _, output = loop_dataset(model, valid_loader, device)

        print('\033[99m' + f'Accuracy on VALID test: {correct_valid}' +
              '\033[0m')

    checkpoint = {
        'model': CNN(),
        'state_dict': model.state_dict(),
        'optimizer': optimizer.state_dict()
    }
    torch.save(checkpoint, os.path.join(output_path, 'checkpoint.pth'))

    plt.figure()
    plt.plot(range(epochs), average_loss_train, lw=0.3, c='g')
    plt.plot(range(epochs), average_loss_test, lw=0.3, c='r')
    plt.legend(['train loss', 'test_loss'])
    plt.xlabel('#Epoch')
    plt.ylabel('Loss')
    plt.savefig(jpath(output_path, 'loss.png'))

    plt.figure()
    plt.plot(range(epochs), accuracy_train, lw=0.3, c='g')
    plt.plot(range(epochs), accuracy_test, lw=0.3, c='r')
    plt.legend(['train_acc', 'test_acc'])
    plt.xlabel('#Epoch')
    plt.ylabel('Accuracy')
    plt.savefig(jpath(output_path, 'accuracy.png'))
예제 #27
0
    def generate_feature(self,
                         dataset_path,
                         music_settings=None,
                         num_threads=4):
        """Extract the feature from the given dataset.

        To train the model, the first step is to pre-process the data into feature
        representations. After downloading the dataset, use this function to generate
        the feature by giving the path of the stored dataset.

        To specify the output path, modify the attribute
        ``music_settings.dataset.feature_save_path``.
        It defaults to the folder under where the dataset stored, generating
        two folders: ``train_feature`` and ``test_feature``.

        Parameters
        ----------
        dataset_path: Path
            Path to the downloaded dataset.
        music_settings: MusicSettings
            The configuration instance that holds all relative settings for
            the life-cycle of building a model.
        num_threads:
            Number of threads for parallel extraction the feature.

        See Also
        --------
        omnizart.constants.datasets:
            The supported datasets and the corresponding training/testing splits.
        """
        settings = self._validate_and_get_settings(music_settings)

        dataset_type = resolve_dataset_type(dataset_path,
                                            keywords={
                                                "maps": "maps",
                                                "musicnet": "musicnet",
                                                "maestro": "maestro",
                                                "rhythm": "pop",
                                                "pop": "pop"
                                            })
        if dataset_type is None:
            logger.warning(
                "The given path %s does not match any built-in processable dataset. Do nothing...",
                dataset_path)
            return
        logger.info("Inferred dataset type: %s", dataset_type)

        # Build instance mapping
        struct = {
            "maps": d_struct.MapsStructure,
            "musicnet": d_struct.MusicNetStructure,
            "maestro": d_struct.MaestroStructure,
            "pop": d_struct.PopStructure
        }[dataset_type]
        label_extractor = {
            "maps": MapsLabelExtraction,
            "musicnet": MusicNetLabelExtraction,
            "maestro": MaestroLabelExtraction,
            "pop": PopLabelExtraction
        }[dataset_type]

        # Fetching wav files
        train_wav_files = struct.get_train_wavs(dataset_path=dataset_path)
        test_wav_files = struct.get_test_wavs(dataset_path=dataset_path)
        logger.info("Number of total training wavs: %d", len(train_wav_files))
        logger.info("Number of total testing wavs: %d", len(test_wav_files))

        # Resolve feature output path
        train_feat_out_path, test_feat_out_path = self._resolve_feature_output_path(
            dataset_path, settings)
        logger.info("Output training feature to %s", train_feat_out_path)
        logger.info("Output testing feature to %s", test_feat_out_path)

        # Feature extraction
        logger.info(
            "Start extract the feature of the dataset %s. "
            "This may take time to finish and affect the computer's performance.",
            dataset_type.title())
        logger.info("Extracting training feature")
        _parallel_feature_extraction(train_wav_files,
                                     train_feat_out_path,
                                     settings.feature,
                                     num_threads=num_threads)
        logger.info("Extracting testing feature")
        _parallel_feature_extraction(test_wav_files,
                                     test_feat_out_path,
                                     settings.feature,
                                     num_threads=num_threads)
        logger.info("Extraction finished")

        # Fetching label files
        train_label_files = struct.get_train_labels(dataset_path=dataset_path)
        test_label_files = struct.get_test_labels(dataset_path=dataset_path)
        logger.info("Number of total training labels: %d",
                    len(train_label_files))
        logger.info("Number of total testing labels: %d",
                    len(test_label_files))
        assert len(train_label_files) == len(train_wav_files)
        assert len(test_label_files) == len(test_wav_files)

        # Extract labels
        logger.info("Start extracting the label of the dataset %s",
                    dataset_type.title())
        label_extractor.process(train_label_files,
                                out_path=train_feat_out_path,
                                t_unit=settings.feature.hop_size)
        label_extractor.process(test_label_files,
                                out_path=test_feat_out_path,
                                t_unit=settings.feature.hop_size)

        # Writing out the settings
        write_yaml(settings.to_json(),
                   jpath(train_feat_out_path, ".success.yaml"))
        write_yaml(settings.to_json(),
                   jpath(test_feat_out_path, ".success.yaml"))
        logger.info("All done")
예제 #28
0
    workdir = abspath(options.workdir)
    outputdir = abspath(options.outputdir)
    #if not outputdir:
    #    outputdir = abspath(os.curdir)
    interesting_ports = options.ports
    exportports = options.exportports
    
    if interesting_ports:
        interesting_ports = [ int(p) for p in interesting_ports.split(',') ]
    
    pprint('workdir: %s' % workdir)
    for root, dirs, files in os.walk(workdir):
        for filename in files:
            if filename.endswith('.gnmap'):
                filename = abspath(jpath(workdir, filename))
                pprint('process file: %s' % filename)
                process_file(filename)
    
    tcp_ports = open_tcp_ports.keys()
    tcp_ports.sort()

    udp_ports = open_udp_ports.keys()
    udp_ports.sort()

    if csv_export:
        out_fh = open(abspath(jpath(outputdir, 'open_by_ports.csv')), 'w')
        out_fh.write('TCP{0}\n'.format(csv_delimiter))
        for port in tcp_ports:
            if interesting_ports and port not in interesting_ports:
                continue
예제 #29
0
    def train(self,
              feature_folder,
              model_name=None,
              input_model_path=None,
              drum_settings=None):
        """Model training.

        Train a new model or continue to train on a previously trained model.

        Parameters
        ----------
        feature_folder: Path
            Path to the folder containing generated feature.
        model_name: str
            The name for storing the trained model. If not given, will default to the
            current timesamp.
        input_model_path: Path
            Continue to train on the pre-trained model by specifying the path.
        drum_settings: DrumSettings
            The configuration instance that holds all relative settings for
            the life-cycle of building a model.
        """
        settings = self._validate_and_get_settings(drum_settings)

        if input_model_path is not None:
            logger.info("Continue to train on model: %s", input_model_path)
            model, prev_set = self._load_model(
                input_model_path, custom_objects=self.custom_objects)
            settings.model.save_path = prev_set.model.save_path
            settings.training.init_learninig_rate = prev_set.training.init_learning_rate
            settings.training.res_block_num = prev_set.training.res_block_num

        logger.info("Constructing dataset instance")
        split = settings.training.steps / (settings.training.steps +
                                           settings.training.val_steps)
        train_feat_files, val_feat_files = get_train_val_feat_file_list(
            feature_folder, split=split)

        output_types = (tf.float32, tf.float32)
        output_shapes = ([120, 120, 4], [4, 13])
        train_dataset = PopDatasetLoader(
                feature_files=train_feat_files,
                num_samples=settings.training.epoch * settings.training.batch_size * settings.training.steps
            ) \
            .get_dataset(settings.training.batch_size, output_types=output_types, output_shapes=output_shapes)
        val_dataset = PopDatasetLoader(
                feature_files=val_feat_files,
                num_samples=settings.training.epoch * settings.training.val_batch_size * settings.training.val_steps
            ) \
            .get_dataset(settings.training.val_batch_size, output_types=output_types, output_shapes=output_shapes)

        if input_model_path is None:
            logger.info("Constructing new model")
            model = drum_model(
                out_classes=13,
                mini_beat_per_seg=settings.feature.mini_beat_per_segment,
                res_block_num=settings.training.res_block_num)

        optimizer = tf.keras.optimizers.Adam(
            learning_rate=settings.training.init_learning_rate)
        model.compile(optimizer=optimizer,
                      loss=loss_func,
                      metrics=["accuracy"])

        logger.info("Resolving model output path")
        if model_name is None:
            model_name = str(datetime.now()).replace(" ", "_")
        if not model_name.startswith(settings.model.save_prefix):
            model_name = settings.model.save_prefix + "_" + model_name
        model_save_path = jpath(settings.model.save_path, model_name)
        ensure_path_exists(model_save_path)
        write_yaml(settings.to_json(),
                   jpath(model_save_path, "configurations.yaml"))
        write_yaml(model.to_yaml(),
                   jpath(model_save_path, "arch.yaml"),
                   dump=False)
        logger.info("Model output to: %s", model_save_path)

        logger.info("Constructing callbacks")
        callbacks = [
            tf.keras.callbacks.EarlyStopping(
                patience=settings.training.early_stop, monitor="val_loss"),
            tf.keras.callbacks.ModelCheckpoint(jpath(model_save_path,
                                                     "weights.h5"),
                                               save_weights_only=True)
        ]
        logger.info("Callback list: %s", callbacks)

        logger.info("Start training")
        history = model.fit(train_dataset,
                            validation_data=val_dataset,
                            epochs=settings.training.epoch,
                            steps_per_epoch=settings.training.steps,
                            validation_steps=settings.training.val_steps,
                            callbacks=callbacks,
                            use_multiprocessing=True,
                            workers=8)
        return model_save_path, history
예제 #30
0
    def generate_feature(self,
                         dataset_path,
                         vocal_settings=None,
                         num_threads=4):
        """Extract the feature of the whole dataset.

        Currently supports MIR-1K and TONAS datasets. To train the model, you have to prepare the training
        data first, then process it into feature representations. After downloading the dataset,
        use this function to do the pre-processing and transform the raw data into features.

        To specify the output path, modify the attribute
        ``vocal_settings.dataset.feature_save_path`` to the value you want.
        It will default to the folder under where the dataset stored, generating
        two folders: ``train_feature`` and ``test_feature``.

        Parameters
        ----------
        dataset_path: Path
            Path to the downloaded dataset.
        vocal_settings: VocalSettings
            The configuration instance that holds all relative settings for
            the life-cycle of building a model.
        num_threads:
            Number of threads for parallel extracting the features.
        """
        settings = self._validate_and_get_settings(vocal_settings)

        dataset_type = resolve_dataset_type(dataset_path,
                                            keywords={
                                                "cmedia": "cmedia",
                                                "mir-1k": "mir1k",
                                                "mir1k": "mir1k",
                                                "tonas": "tonas"
                                            })
        if dataset_type is None:
            logger.warning(
                "The given path %s does not match any built-in processable dataset. Do nothing...",
                dataset_path)
            return
        logger.info("Inferred dataset type: %s", dataset_type)

        # Build instance mapping
        struct = {
            "cmedia": d_struct.CMediaStructure,
            "mir1k": d_struct.MIR1KStructure,
            "tonas": d_struct.TonasStructure
        }[dataset_type]
        label_extractor = {
            "cmedia": lextor.CMediaLabelExtraction,
            "mir1k": lextor.MIR1KlabelExtraction,
            "tonas": lextor.TonasLabelExtraction
        }[dataset_type]

        # Fetching wav files
        train_data = struct.get_train_data_pair(dataset_path=dataset_path)
        test_data = struct.get_test_data_pair(dataset_path=dataset_path)
        logger.info("Number of total training wavs: %d", len(train_data))
        logger.info("Number of total testing wavs: %d", len(test_data))

        # Resolve feature output path
        train_feat_out_path, test_feat_out_path = self._resolve_feature_output_path(
            dataset_path, settings)
        logger.info("Output training feature to %s", train_feat_out_path)
        logger.info("Output testing feature to %s", test_feat_out_path)

        # Feature extraction
        logger.info(
            "Start extract training feature of the dataset %s. "
            "This may take time to finish and affect the computer's performance.",
            dataset_type.title())
        # Do source separation to separate the vocal track first.
        wav_paths = _vocal_separation([data[0] for data in train_data],
                                      jpath(dataset_path,
                                            "train_wavs_spleeter"))
        train_data = _validate_order_and_get_new_pair(wav_paths, train_data)
        _parallel_feature_extraction(train_data,
                                     label_extractor,
                                     train_feat_out_path,
                                     settings.feature,
                                     num_threads=num_threads)

        # Feature extraction
        logger.info(
            "Start extract testing feature of the dataset %s. "
            "This may take time to finish and affect the computer's performance.",
            dataset_type.title())
        # Do source separation to separate the vocal track first.
        wav_paths = _vocal_separation([data[0] for data in test_data],
                                      jpath(dataset_path,
                                            "test_wavs_spleeter"))
        test_data = _validate_order_and_get_new_pair(wav_paths, test_data)
        _parallel_feature_extraction(test_data,
                                     label_extractor,
                                     test_feat_out_path,
                                     settings.feature,
                                     num_threads=num_threads)

        # Writing out the settings
        write_yaml(settings.to_json(),
                   jpath(train_feat_out_path, ".success.yaml"))
        write_yaml(settings.to_json(),
                   jpath(test_feat_out_path, ".success.yaml"))
        logger.info("All done")
예제 #31
0
    def train(self,
              feature_folder,
              semi_feature_folder=None,
              model_name=None,
              input_model_path=None,
              vocal_settings=None):
        """Model training.

        Train a new model or continue to train on a previously trained model.

        Parameters
        ----------
        feature_folder: Path
            Path to the folder containing generated feature.
        semi_feature_folder: Path
            If specified, semi-supervise learning will be leveraged, and the feature
            files contained in this folder will be used as unsupervised data.
        model_name: str
            The name for storing the trained model. If not given, will default to the
            current timesamp.
        input_model_path: Path
            Continue to train on the pre-trained model by specifying the path.
        vocal_settings: VocalSettings
            The configuration instance that holds all relative settings for
            the life-cycle of building a model.
        """
        settings = self._validate_and_get_settings(vocal_settings)

        if input_model_path is not None:
            logger.info("Continue to train on model: %s", input_model_path)
            model, prev_set = self._load_model(input_model_path)
            settings.model.save_path = prev_set.model.save_path

        logger.info("Constructing dataset instance")
        split = settings.training.steps / (settings.training.steps +
                                           settings.training.val_steps)
        train_feat_files, val_feat_files = get_train_val_feat_file_list(
            feature_folder, split=split)

        output_types = (tf.float32, tf.float32)
        output_shapes = ((settings.training.context_length * 2 + 1, 174, 9),
                         (19, 6))  # noqa: E226
        train_dataset = VocalDatasetLoader(
                ctx_len=settings.training.context_length,
                feature_files=train_feat_files,
                num_samples=settings.training.epoch * settings.training.batch_size * settings.training.steps
            ) \
            .get_dataset(settings.training.batch_size, output_types=output_types, output_shapes=output_shapes)
        val_dataset = VocalDatasetLoader(
                ctx_len=settings.training.context_length,
                feature_files=val_feat_files,
                num_samples=settings.training.epoch * settings.training.val_batch_size * settings.training.val_steps
            ) \
            .get_dataset(settings.training.val_batch_size, output_types=output_types, output_shapes=output_shapes)
        if semi_feature_folder is not None:
            # Semi-supervise learning dataset.
            feat_files = glob.glob(f"{semi_feature_folder}/*.hdf")
            semi_dataset = VocalDatasetLoader(
                    ctx_len=settings.training.context_length,
                    feature_files=feat_files,
                    num_samples=settings.training.epoch * settings.training.batch_size * settings.training.steps
                ) \
                .get_dataset(settings.training.batch_size, output_types=output_types, output_shapes=output_shapes)
            train_dataset = tf.data.Dataset.zip((train_dataset, semi_dataset))

        if input_model_path is None:
            logger.info("Constructing new model")
            model = self.get_model(settings)

        # Notice: the original implementation uses AdamW as the optimizer, which is also viable through
        # tensorflow_addons.optimizers.AdamW. However we found that by using AdamW, the model would fail
        # to converge, and instead the training loss will get higher and higher.
        optimizer = tf.keras.optimizers.Adam(
            learning_rate=settings.training.init_learning_rate)
        model.compile(optimizer=optimizer,
                      loss='bce',
                      metrics=['accuracy', 'binary_accuracy'])

        logger.info("Resolving model output path")
        if model_name is None:
            model_name = str(datetime.now()).replace(" ", "_")
        if not model_name.startswith(settings.model.save_prefix):
            model_name = settings.model.save_prefix + "_" + model_name
        model_save_path = jpath(settings.model.save_path, model_name)
        ensure_path_exists(model_save_path)
        write_yaml(settings.to_json(),
                   jpath(model_save_path, "configurations.yaml"))
        logger.info("Model output to: %s", model_save_path)

        logger.info("Constructing callbacks")
        callbacks = [
            tf.keras.callbacks.EarlyStopping(
                patience=settings.training.early_stop, monitor="val_loss"),
            tf.keras.callbacks.ModelCheckpoint(jpath(model_save_path,
                                                     "weights"),
                                               save_weights_only=True,
                                               monitor="val_loss")
        ]
        logger.info("Callback list: %s", callbacks)

        logger.info("Start training")
        history = model.fit(train_dataset,
                            validation_data=val_dataset,
                            epochs=settings.training.epoch,
                            steps_per_epoch=settings.training.steps,
                            validation_steps=settings.training.val_steps,
                            callbacks=callbacks,
                            use_multiprocessing=True,
                            workers=8)
        return model_save_path, history
예제 #32
0
    def generate_feature(self,
                         dataset_path,
                         drum_settings=None,
                         num_threads=3):
        """Extract the feature of the whole dataset.

        Currently only supports Pop dataset. To train the model, you have to prepare the training
        data first, then process it into feature representations. After downloading the dataset,
        use this function to do the pre-processing and transform the raw data into features.

        To specify the output path, modify the attribute
        ``music_settings.dataset.feature_save_path`` to the value you want.
        It will default to the folder under where the dataset stored, generating
        two folders: ``train_feature`` and ``test_feature``.

        Parameters
        ----------
        dataset_path: Path
            Path to the downloaded dataset.
        drum_settings: DrumSettings
            The configuration instance that holds all relative settings for
            the life-cycle of building a model.
        num_threads:
            Number of threads for parallel extracting the features.

        See Also
        --------
        omnizart.constants.datasets.PopStructure:
            The only supported dataset for drum transcription. Records the train/test
            partition according to the folder.
        """
        settings = self._validate_and_get_settings(drum_settings)

        # Resolve feature output path
        train_feat_out_path, test_feat_out_path = self._resolve_feature_output_path(
            dataset_path, settings)
        logger.info("Output training feature to %s", train_feat_out_path)
        logger.info("Output testing feature to %s", test_feat_out_path)

        struct = PopStructure
        train_data_pair = struct.get_train_data_pair(dataset_path=dataset_path)
        logger.info(
            "Start extract training feature of the dataset. "
            "This may take time to finish and affect the computer's performance"
        )
        _parallel_feature_extraction_v2(train_data_pair,
                                        train_feat_out_path,
                                        settings.feature,
                                        num_threads=num_threads)

        test_data_pair = struct.get_test_data_pair(dataset_path=dataset_path)
        logger.info(
            "Start extract testing feature of the dataset. "
            "This may take time to finish and affect the computer's performance"
        )
        _parallel_feature_extraction_v2(test_data_pair,
                                        test_feat_out_path,
                                        settings.feature,
                                        num_threads=num_threads)

        # Writing out the settings
        write_yaml(settings.to_json(),
                   jpath(train_feat_out_path, ".success.yaml"))
        write_yaml(settings.to_json(),
                   jpath(test_feat_out_path, ".success.yaml"))
        logger.info("All done")
예제 #33
0
def load():
    pygame.font.init()

    logger.info("loading")
    from zort import config

    global resource_path
    global sounds, images, music, fonts, maps, tiles
    global border, border_path

    # tiles = dict()
    # sounds = dict()
    # images = dict()
    # music = dict()
    # fonts = dict()
    # maps = OrderedDict()

    resource_path = config.get('paths', 'resource-path')
    resource_path = abspath(resource_path)
    border_path = jpath(resource_path, 'dialog.png')
    sounds_path = jpath(resource_path, 'sounds', '*')

    # load the tiles
    tile_path = jpath(resource_path, 'tiles', '*png')
    for filename in glob.glob(tile_path):
        path = jpath(resource_path, 'tiles', filename)
        image = pygame.image.load(path).convert_alpha()
        tiles[basename(filename)] = image
        yield path, image

    for name, filename in config.items('font-files'):
        path = jpath(resource_path, 'fonts', filename)
        fonts[name] = path
        yield path, path

    vol = config.getint('sound', 'sound-volume') / 100.
    for filename in glob.glob(sounds_path):
        logger.info("loading %s", filename)
        try:
            if isfile(filename):
                sound = pygame.mixer.Sound(filename)
                sound.set_volume(vol)
                sounds[basename(filename)] = sound
                yield filename, sound
        except pygame.error:
            pass

    for name, filename in config.items('image-files'):
        path = jpath(resource_path, 'images', filename)
        logger.info("loading %s", path)
        image = pygame.image.load(path)
        images[name] = image
        yield path, image

    for name, filename in config.items('map-files'):
        path = jpath(resource_path, 'maps', filename)
        logger.info("loading %s", path)
        maps[name] = path
        yield path, map

    for name, filename in config.items('music-files'):
        path = jpath(resource_path, 'music', filename)
        logger.info("loading %s", path)
        music[name] = path
        yield path, path
예제 #34
0
    def train(self, feature_folder, model_name=None, input_model_path=None, beat_settings=None):
        """Model training.

        Train the model from scratch or continue training given a model checkpoint.

        Parameters
        ----------
        feature_folder: Path
            Path to the generated feature.
        model_name: str
            The name of the trained model. If not given, will default to the
            current timestamp.
        input_model_path: Path
            Specify the path to the model checkpoint in order to fine-tune
            the model.
        beat_settings: BeatSettings
            The configuration that holds all relative settings for
            the life-cycle of model building.
        """
        settings = self._validate_and_get_settings(beat_settings)

        if input_model_path is not None:
            logger.info("Continue to train on model: %s", input_model_path)
            model, prev_set = self._load_model(input_model_path)
            settings.model.from_json(prev_set.model.to_json())
            settings.feature.time_unit = prev_set.feature.time_unit

        logger.info("Constructing dataset instance")
        split = settings.training.steps / (settings.training.steps + settings.training.val_steps)
        train_feat_files, val_feat_files = get_train_val_feat_file_list(feature_folder, split=split)

        output_types = (tf.float32, tf.float32)
        output_shapes = ((settings.model.timesteps, 178), (settings.model.timesteps, 2))
        train_dataset = BeatDatasetLoader(
                feature_files=train_feat_files,
                num_samples=settings.training.epoch * settings.training.batch_size * settings.training.steps,
                slice_hop=settings.model.timesteps // 2
            ) \
            .get_dataset(settings.training.batch_size, output_types=output_types, output_shapes=output_shapes)
        val_dataset = BeatDatasetLoader(
                feature_files=val_feat_files,
                num_samples=settings.training.epoch * settings.training.val_batch_size * settings.training.val_steps,
                slice_hop=settings.model.timesteps // 2
            ) \
            .get_dataset(settings.training.val_batch_size, output_types=output_types, output_shapes=output_shapes)

        if input_model_path is None:
            logger.info("Constructing new %s model for training.", settings.model.model_type)
            model_func = {
                "blstm": self._construct_blstm_model,
                "blstm_attn": self._construct_blstm_attn_model
            }[settings.model.model_type]
            model = model_func(settings)

        logger.info("Compiling model")
        optimizer = tf.keras.optimizers.Adam(learning_rate=settings.training.init_learning_rate)
        loss = lambda y, x: weighted_binary_crossentropy(y, x, down_beat_weight=settings.training.down_beat_weight)
        model.compile(optimizer=optimizer, loss=loss, metrics=["accuracy"])

        logger.info("Resolving model output path")
        if model_name is None:
            model_name = str(datetime.now()).replace(" ", "_")
        if not model_name.startswith(settings.model.save_prefix):
            model_name = settings.model.save_prefix + "_" + model_name
        model_save_path = jpath(settings.model.save_path, model_name)
        ensure_path_exists(model_save_path)
        write_yaml(settings.to_json(), jpath(model_save_path, "configurations.yaml"))
        write_yaml(model.to_yaml(), jpath(model_save_path, "arch.yaml"), dump=False)
        logger.info("Model output to: %s", model_save_path)

        logger.info("Constructing callbacks")
        callbacks = [
            tf.keras.callbacks.EarlyStopping(
                patience=settings.training.early_stop, monitor="val_loss", restore_best_weights=False
            ),
            tf.keras.callbacks.ModelCheckpoint(
                jpath(model_save_path, "weights.h5"), save_weights_only=True, monitor="val_loss"
            )
        ]
        logger.info("Callback list: %s", callbacks)

        logger.info("Start training")
        history = model.fit(
            train_dataset,
            validation_data=val_dataset,
            epochs=settings.training.epoch,
            steps_per_epoch=settings.training.steps,
            validation_steps=settings.training.val_steps,
            callbacks=callbacks,
            use_multiprocessing=True,
            workers=8
        )
        return model_save_path, history
예제 #35
0
                        type=str,
                        default='datasets/KingsCollege',
                        help='dataset root')
    parser.add_argument('--height', type=int, default=256, help='image height')
    parser.add_argument('--width', type=int, default=455, help='image width')
    parser.add_argument('--save_resized_imgs',
                        action="store_true",
                        default=False,
                        help='save resized train/test images [height, width]')
    return parser.parse_args()


args = params()
dataroot = args.dataroot
imsize = [args.height, args.width]  # (H, W)
imlist = np.loadtxt(jpath(dataroot, 'dataset_train.txt'),
                    dtype=str,
                    delimiter=' ',
                    skiprows=3,
                    usecols=(0))
mean_image = np.zeros((imsize[0], imsize[1], 3), dtype=np.float)
for i, impath in enumerate(imlist):
    print('[%d/%d]:%s' % (i + 1, len(imlist), impath), end='\r')
    image = Image.open(jpath(dataroot, impath)).convert('RGB')
    image = image.resize((imsize[1], imsize[0]), Image.BICUBIC)
    mean_image += np.array(image).astype(np.float)

    # save resized training images
    if args.save_resized_imgs:
        image.save(jpath(dataroot, impath))
print()
예제 #36
0
 def get_test_labels(cls, dataset_path):
     _, test_ids = cls._get_train_test_split_ids(dataset_path)
     label_path = jpath(dataset_path, cls.label_folder)
     return cls._get_paths_in_ids(label_path, cls.label_file_name, test_ids)
예제 #37
0
 def get_test_wavs(cls, dataset_path):
     _, test_ids = cls._get_train_test_split_ids(dataset_path)
     feat_path = jpath(dataset_path, cls.feature_folder)
     return cls._get_paths_in_ids(feat_path, cls.feature_file_name, test_ids)
예제 #38
0
 def _get_paths_in_ids(cls, target_folder, target_file, ids):
     output = []
     for f_name in os.listdir(target_folder):
         if f_name in ids:
             output.append(jpath(target_folder, f_name, target_file))
     return output