def __init__(self, conf_path, model_name): super(ModelFactory, self).__init__() hparams = read_config(conf_path, "FeatureExaction") window_size=int(hparams['window_size']) hop_size = int(hparams['hop_size']) pad_mode = hparams['pad_mode'] window = hparams['window'] self.stft = STFT(n_fft=window_size, hop_length=hop_size, win_length=window_size, window=window, center=True, pad_mode=pad_mode, freeze_parameters=True) self.istft = ISTFT(n_fft=window_size, hop_length=hop_size, win_length=window_size, window=window, center=True, pad_mode=pad_mode, freeze_parameters=True) if model_name in ['AMT', 'AMTBaseline']: network = AMTBaseline(conf_path) elif model_name in ['MSS', 'MSSBaseline']: network = MSSBaseline(conf_path) elif model_name in ['MSS-AMT', 'MultiTaskBaseline']: network = MultiTaskBaseline(conf_path) elif model_name in ['MSI', 'MSI-DIS', 'DisentanglementModel']: network = DisentanglementModel(conf_path) self.network = network
def __init_urmp_params__(self, config_path, config_name, name): hparams = read_config(config_path, config_name) params = self._params params['max_note_shift'] = int(hparams['max_note_shift']) params['frames_per_second'] = float(hparams['frames_per_second']) params['begin_note'] = int(hparams['begin_note']) params['classes_num'] = int(hparams['classes_num']) params['config_name'] = config_name self.tag = -1
def __init__(self, conf_path, model_name, latent_rep_dim): super(Transcriptor, self).__init__() hparams = read_config(conf_path, model_name) blocks_num = int(hparams['blocks_num']) output_dim = int(hparams['output_dim']) in_channels = latent_rep_dim[0] input_size = latent_rep_dim[1] out_channels = in_channels * 2 self.layers = nn.ModuleList() for i in range(blocks_num): self.layers.append(DeepConvBlock(in_channels, out_channels)) in_channels = out_channels out_channels *= 2 input_size //= 2 self.bottom = LinearBlock1D(in_channels * input_size, output_dim) self.notes_num = output_dim
def __init_params__(self, config_path, mode): hparams = read_config(config_path, 'hdf5s_data') instruments = hparams['instruments'].split(',') datasets = {} for instr in instruments: datasets[instr] = UrmpDataset(instr, config_path, instr, mode, mode=='train') notes_num = datasets[instr].get_classes_num() duration = datasets[instr].get_duration() frames_per_second = datasets[instr].get_frames_per_second() sample_rate = datasets[instr].get_sample_rate() self._datasets = datasets datasets_index = [] datasets_samples_num = [0] for d in datasets: datasets_index.append(d) n = datasets[d].get_samples_num() datasets_samples_num.append(n + datasets_samples_num[-1]) self._datasets_index = datasets_index self.datasets_samples_num = datasets_samples_num classes_num = len(datasets_index) params = {} params['batch_size'] = int(hparams[f'{mode}_batch_size']) params['mode'] = mode params['notes_num'] = notes_num params['classes_num'] = classes_num params['duration'] = duration params['frames_per_second'] = frames_per_second params['sources_num'] = len(instruments) params['instruments'] = instruments params['sample_rate'] = sample_rate params['len'] = int(hparams['samples_num']) self._params = params
def __init__(self, conf_path, model_name): super(QueryNet, self).__init__() hparams = read_config(conf_path, model_name) input_channels_num = int(hparams['input_channels_num']) input_size = int(hparams['input_size']) pnum = int(hparams['pnum']) blocks_num = int(hparams['blocks_num']) layers = nn.ModuleList() in_channels = input_channels_num output_size = input_size out_channels = 2 for i in range(blocks_num): layers.append(ConvBlock(in_channels=in_channels, out_channels=out_channels)) in_channels = out_channels out_channels *= 2 output_size //= 2 self.bn0 = Bn0() self.layers = layers self.bottom = LinearBlock1D(in_channels * output_size, pnum) self.blocks_num = blocks_num
def __init__(self, conf_path, model_name): super(Encoder, self).__init__() hparams = read_config(conf_path, model_name) blocks_num = int(hparams["blocks_num"]) input_channels_num = int(hparams["input_channels_num"]) input_size = int(hparams["input_size"]) with_bn0 = hparams["with_bn0"] condition_dim = int(hparams["condition_dim"]) self.bn0 = Bn0() convBlock = DeepConvBlock layers = nn.ModuleList() film_layers = nn.ModuleList() latent_rep_channels = [] in_channels = input_channels_num out_channels = 2 for i in range(blocks_num + 1): layers.append(convBlock(in_channels=in_channels, out_channels=out_channels)) film_layers.append(FiLM1DLayer(condition_dim, out_channels, input_size)) latent_rep_channels.append([out_channels, input_size]) in_channels = out_channels out_channels *= 2 input_size //= 2 self.blocks_num = blocks_num self.output_size = input_size // 2 self.output_dim = out_channels // 2 self.layers = layers self.film_layers = film_layers self.latent_rep_channels = latent_rep_channels
def __init__(self, config_path, config_name, mode, shuffle): hparams = read_config(config_path, config_name) self._params = self.__init_params__(hparams, mode, shuffle) self._data = self.__init_data__() self._tracks_id = self.__init_tracks_id__()