Exemple #1
0
    def __init__(self, config, useTPU=False):

        self.config = config

        self.useTPU = useTPU

        self.num_stacks = self.config['model']['num_stacks']
        if type(self.config['model']['dilations']) is int:
            self.dilations = [
                2**i for i in range(0, self.config['model']['dilations'] + 1)
            ]
        elif type(self.config['model']['dilations']) is list:
            self.dilations = self.config['model']['dilations']
        self.receptive_field_length = util.compute_receptive_field_length(
            config['model']['num_stacks'], self.dilations,
            config['model']['filters']['lengths']['res'], 1)
        self.target_field_length = config['model']['target_field_length']
        self.input_length = self.receptive_field_length + (
            self.target_field_length - 1)

        self.target_padding = config['model']['target_padding']
        self.padded_target_field_length = self.target_field_length + 2 * self.target_padding
        self.half_target_field_length = int(self.target_field_length / 2)
        self.half_receptive_field_length = int(self.receptive_field_length / 2)
        self.num_residual_blocks = len(self.dilations) * self.num_stacks
        self.samples_of_interest_indices = self.get_padded_target_field_indices(
        )
        self.target_sample_indices = self.get_target_field_indices()

        self.num_sources = config['model']['num_sources']
        self.num_channels = 2

        self.epoch_num = 0

        self.model = self.setup_model()
Exemple #2
0
    def __init__(self, config, load_checkpoint=None, input_length=None, target_field_length=None, print_model_summary=False):

        self.config = config
        self.verbosity = config['training']['verbosity']

        self.num_stacks = self.config['model']['num_stacks']
        if type(self.config['model']['dilations']) is int:
            self.dilations = [2 ** i for i in range(0, self.config['model']['dilations'] + 1)]
        elif type(self.config['model']['dilations']) is list:
            self.dilations = self.config['model']['dilations']

        self.num_condition_classes = config['dataset']['num_condition_classes']

        self.condition_input_length = self.get_condition_input_length(self.config['model']['condition_encoding'])
        self.receptive_field_length = int(util.compute_receptive_field_length(config['model']['num_stacks'], self.dilations,
                                                                              config['model']['filters']['lengths']['res'],
                                                                              1))

        if input_length is not None:
            self.input_length = input_length
            self.target_field_length = self.input_length - (self.receptive_field_length - 1)
        if target_field_length is not None:
            self.target_field_length = target_field_length
            self.input_length = self.receptive_field_length + (self.target_field_length - 1)
        else:
            self.target_field_length = config['model']['target_field_length']
            self.input_length = self.receptive_field_length + (self.target_field_length - 1)

        self.target_padding = config['model']['target_padding']
        self.padded_target_field_length = self.target_field_length + 2 * self.target_padding
        self.half_target_field_length = self.target_field_length / 2
        self.half_receptive_field_length = self.receptive_field_length / 2
        self.num_residual_blocks = len(self.dilations) * self.num_stacks
        self.activation = keras.layers.Activation('relu')
        self.samples_of_interest_indices = self.get_padded_target_field_indices()
        self.target_sample_indices = self.get_target_field_indices()

        self.optimizer = self.get_optimizer()
        self.out_1_loss = self.get_out_1_loss()
        self.out_2_loss = self.get_out_2_loss()
        self.metrics = self.get_metrics()
        self.epoch_num = 0
        self.checkpoints_path = ''
        self.samples_path = ''
        self.history_filename = ''

        self.config['model']['num_residual_blocks'] = self.num_residual_blocks
        self.config['model']['receptive_field_length'] = self.receptive_field_length
        self.config['model']['input_length'] = self.input_length
        self.config['model']['target_field_length'] = self.target_field_length

        self.model = self.setup_model(load_checkpoint, print_model_summary)
Exemple #3
0
    def __init__(self,
                 config,
                 load_checkpoint=None,
                 input_length=None,
                 target_field_length=None,
                 print_model_summary=False):

        self.config = config
        self.verbosity = config['training']['verbosity']

        self.num_stacks = self.config['model']['num_stacks']
        if type(self.config['model']['dilations']) is int:
            self.dilations = [
                2**i for i in range(0, self.config['model']['dilations'] + 1)
            ]
        elif type(self.config['model']['dilations']) is list:
            self.dilations = self.config['model']['dilations']

        self.num_condition_classes = config['dataset']['num_condition_classes']

        # not really sure about this one
        # it's just ceiling of np.log2(29) where 29 is num cond classes
        self.condition_input_length = self.get_condition_input_length(
            self.config['model']['condition_encoding'])
        # here it is calculating basically the length of all nodes from these dilated stacks?
        self.receptive_field_length = util.compute_receptive_field_length(
            config['model']['num_stacks'], self.dilations,
            config['model']['filters']['lengths']['res'], 1)

        if input_length is not None:
            self.input_length = input_length
            self.target_field_length = self.input_length - (
                self.receptive_field_length - 1)
        if target_field_length is not None:
            self.target_field_length = target_field_length
            self.input_length = self.receptive_field_length + (
                self.target_field_length - 1)
        else:
            # sitas target field length of 1601 atrodo kaip magija, is kur jis gautas?
            # target field length greiciui, turbut mazdaug random, bet nera svarbu manau
            self.target_field_length = config['model']['target_field_length']
            self.input_length = self.receptive_field_length + (
                self.target_field_length - 1)

        self.target_padding = config['model']['target_padding']
        self.padded_target_field_length = self.target_field_length + 2 * self.target_padding
        self.half_target_field_length = self.target_field_length // 2
        self.half_receptive_field_length = self.receptive_field_length // 2
        self.num_residual_blocks = len(self.dilations) * self.num_stacks
        self.activation = keras.layers.Activation('relu')
        self.samples_of_interest_indices = self.get_padded_target_field_indices(
        )
        self.target_sample_indices = self.get_target_field_indices()

        self.optimizer = self.get_optimizer()
        self.out_1_loss = self.get_out_1_loss()
        self.out_2_loss = self.get_out_2_loss()
        self.metrics = self.get_metrics()
        self.epoch_num = 0
        self.checkpoints_path = ''
        self.samples_path = ''
        self.history_filename = ''

        self.config['model']['num_residual_blocks'] = self.num_residual_blocks
        self.config['model'][
            'receptive_field_length'] = self.receptive_field_length
        self.config['model']['input_length'] = self.input_length
        self.config['model']['target_field_length'] = self.target_field_length

        self.model = self.setup_model(load_checkpoint, print_model_summary)
Exemple #4
0
    def __init__(self,
                 config,
                 load_checkpoint=None,
                 input_length=None,
                 target_field_length=None,
                 print_model_summary=False):

        self.config = config
        self.verbosity = config['training']['verbosity']

        self.batch_size = config['training']['batch_size']
        self.n_speaker = config['training'][
            'n_speaker'] if 'n_speaker' in config['training'] else 2
        self.n_output = config['training']['n_output'] if 'n_output' in config[
            'training'] else 2
        self.mute_other_channel = config['training'][
            'mute_other_channel'] if 'mute_other_channel' in config[
                'training'] else False

        self.num_stacks = self.config['model']['num_stacks']

        if type(self.config['model']['dilations']) is int:
            self.dilations = [
                2**i for i in range(0, self.config['model']['dilations'] + 1)
            ]
        elif type(self.config['model']['dilations']) is list:
            self.dilations = self.config['model']['dilations']

        self.use_condition = config['training']['use_condition']
        self.num_condition_classes = config['dataset']['num_condition_classes']
        self.condition_input_length = self.get_condition_input_length(
            self.config['model']['condition_encoding'])

        # single target need how many input samples(must be odd)
        self.receptive_field_length = util.compute_receptive_field_length(
            config['model']['num_stacks'], self.dilations,
            config['model']['filters']['lengths']['res'], 1)
        print('Receptive Field Length:', self.receptive_field_length)
        if input_length is not None:
            self.input_length = input_length
            self.target_field_length = self.input_length - (
                self.receptive_field_length - 1)
        if target_field_length is not None:
            self.target_field_length = target_field_length
            self.input_length = self.receptive_field_length + (
                self.target_field_length - 1)
        else:
            self.target_field_length = config['model']['target_field_length']
            self.input_length = self.receptive_field_length + (
                self.target_field_length - 1)

        self.input_length = int(self.input_length)
        print('Input Length:', self.input_length)
        self.target_padding = config['model']['target_padding']
        self.padded_target_field_length = self.target_field_length + 2 * self.target_padding
        self.half_target_field_length = self.target_field_length // 2
        self.half_receptive_field_length = int(
            self.receptive_field_length) // 2
        self.num_residual_blocks = len(self.dilations) * self.num_stacks
        self.activation = keras.layers.Activation('relu')
        # self.activation = keras.layers.Activation('selu')
        # self.activation = keras.layers.PReLU()
        self.samples_of_interest_indices = self.get_padded_target_field_indices(
        )
        self.target_sample_indices = self.get_target_field_indices()

        self.ozer_type = self.config['optimizer']['type']
        self.optimizer = self.get_optimizer()
        self.pit_loss = self.get_pit_loss()
        self.metrics = self.get_metrics()
        self.epoch_num = 0
        self.checkpoints_path = ''
        self.samples_path = ''
        self.history_filename = ''

        self.config['model']['num_residual_blocks'] = self.num_residual_blocks
        self.config['model'][
            'receptive_field_length'] = self.receptive_field_length
        self.config['model']['input_length'] = self.input_length
        self.config['model']['target_field_length'] = self.target_field_length

        self.model = self.setup_model(load_checkpoint, print_model_summary)
Exemple #5
0
    def __init__(self,
                 config,
                 load_checkpoint=None,
                 input_length=None,
                 target_field_length=None,
                 print_model_summary=False):

        self.config = config
        self.verbosity = config['training']['verbosity']
        #堆叠三层(num_stacks==3)
        self.num_stacks = self.config['model']['num_stacks']
        if type(self.config['model']['dilations']) is int:
            #1,2,4,...,512重复三次
            self.dilations = [
                2**i for i in range(0, self.config['model']['dilations'] + 1)
            ]
        elif type(self.config['model']['dilations']) is list:
            self.dilations = self.config['model']['dilations']
        #类别是29,应该是说话人鉴别,(一共28个说话人,加上一个unknown)
        #self.num_condition_classes = config['dataset']['num_condition_classes']
        #使用binary编码方式,这个长度计算出来是5,(因为29类用binary的编码方式5位就够了)
        #self.condition_input_length = self.get_condition_input_length(self.config['model']['condition_encoding'])
        #计算出来就是论文第四页的 6139 samples(对应一个output samples的输入感受野大小)
        self.receptive_field_length = util.compute_receptive_field_length(
            config['model']['num_stacks'], self.dilations,
            config['model']['filters']['lengths']['res'], 1)

        #这里的target field的大小是直接给出的,就是一次预测出多个samples(json中给的是1601)
        if input_length is not None:
            self.input_length = input_length
            self.target_field_length = self.input_length - (
                self.receptive_field_length - 1)
        if target_field_length is not None:
            self.target_field_length = target_field_length
            self.input_length = self.receptive_field_length + (
                self.target_field_length - 1)
        #这里的代码是进入else选项 target_field_length == 1601
        #receptive_field_length计算为6139(和论文中一致)
        # ==>input_length == 6139 + 1601 - 1
        #训练的时候使用的是最下面的这种模型
        else:
            self.target_field_length = config['model']['target_field_length']
            self.input_length = self.receptive_field_length + (
                self.target_field_length - 1)

        #padding是在数据的周围补零吧
        #padding的目的是让输入数据和输出数据的长度是相同的
        self.target_padding = config['model']['target_padding']
        self.padded_target_field_length = self.target_field_length + 2 * self.target_padding
        #python3中是//表示变为整数
        self.half_target_field_length = self.target_field_length // 2
        self.half_receptive_field_length = self.receptive_field_length // 2
        #residual block的数量
        self.num_residual_blocks = len(self.dilations) * self.num_stacks
        self.activation = layers.Activation('relu')
        #加了padding的target范围
        self.samples_of_interest_indices = self.get_padded_target_field_indices(
        )
        #不加padding的target范围
        self.target_sample_indices = self.get_target_field_indices()

        self.optimizer = self.get_optimizer()
        self.out_1_loss = self.get_out_1_loss()
        self.out_2_loss = self.get_out_2_loss()
        self.metrics = self.get_metrics()
        self.epoch_num = 0
        self.checkpoints_path = ''
        self.samples_path = ''
        self.history_filename = ''

        #后面这些参数在完成训练之后,写入到json文件中
        self.config['model']['num_residual_blocks'] = self.num_residual_blocks
        self.config['model'][
            'receptive_field_length'] = self.receptive_field_length
        self.config['model']['input_length'] = self.input_length
        self.config['model']['target_field_length'] = self.target_field_length

        self.model = self.setup_model(load_checkpoint, print_model_summary)