Example #1
0
    def __init__(self, net_spec, in_dim, out_dim):
        '''
        net_spec:
        hid_layers: list containing dimensions of the hidden layers
        hid_layers_activation: activation function for the hidden layers
        init_fn: weight initialization function
        clip_grad_val: clip gradient norm if value is not None
        loss_spec: measure of error between model predictions and correct outputs
        optim_spec: parameters for initializing the optimizer
        lr_scheduler_spec: Pytorch optim.lr_scheduler
        update_type: method to update network weights: 'replace' or 'polyak'
        update_frequency: how many total timesteps per update
        polyak_coef: ratio of polyak weight update
        gpu: whether to train using a GPU. Note this will only work if a GPU is available, othewise setting gpu=True does nothing
        '''
        nn.Module.__init__(self)
        super(MLPNet, self).__init__(net_spec, in_dim, out_dim)
        # set default
        util.set_attr(self, dict(
            init_fn=None,
            clip_grad_val=None,
            loss_spec={'name': 'MSELoss'},
            optim_spec={'name': 'Adam'},
            lr_scheduler_spec=None,
            update_type='replace',
            update_frequency=1,
            polyak_coef=0.0,
            gpu=False,
        ))
        util.set_attr(self, self.net_spec, [
            'shared',
            'hid_layers',
            'hid_layers_activation',
            'init_fn',
            'clip_grad_val',
            'loss_spec',
            'optim_spec',
            'lr_scheduler_spec',
            'update_type',
            'update_frequency',
            'polyak_coef',
            'gpu',
        ])

        dims = [self.in_dim] + self.hid_layers
        self.model = net_util.build_fc_model(dims, self.hid_layers_activation)
        # add last layer with no activation
        # tails. avoid list for single-tail for compute speed
        if ps.is_integer(self.out_dim):
            self.model_tail = nn.Linear(dims[-1], self.out_dim)
        else:
            self.model_tails = nn.ModuleList([nn.Linear(dims[-1], out_d) for out_d in self.out_dim])

        net_util.init_layers(self, self.init_fn)
        for module in self.modules():
            module.to(self.device)
        self.loss_fn = net_util.get_loss_fn(self, self.loss_spec)
        self.optim = net_util.get_optim(self, self.optim_spec)
        self.lr_scheduler = net_util.get_lr_scheduler(self, self.lr_scheduler_spec)
Example #2
0
def get_policy_out_dim(body):
    '''Helper method to construct the policy network out_dim for a body according to is_discrete, action_type'''
    action_dim = body.action_dim
    if body.is_discrete:
        if body.action_type == 'multi_discrete':
            assert ps.is_list(action_dim), action_dim
            policy_out_dim = action_dim
        else:
            assert ps.is_integer(action_dim), action_dim
            policy_out_dim = action_dim
    else:
        assert ps.is_integer(action_dim), action_dim
        if action_dim == 1:  # single action, use [loc, scale]
            policy_out_dim = 2
        else:  # multi-action, use [locs], [scales]
            policy_out_dim = [action_dim, action_dim]
    return policy_out_dim
Example #3
0
def get_policy_out_dim(body):
    '''Helper method to construct the policy network out_dim for a body according to is_discrete, action_type'''
    if body.is_discrete:
        if body.action_type == 'multi_discrete':
            assert ps.is_list(body.action_dim), body.action_dim
            policy_out_dim = body.action_dim
        else:
            assert ps.is_integer(body.action_dim), body.action_dim
            policy_out_dim = body.action_dim
    else:
        if body.action_type == 'multi_continuous':
            assert ps.is_list(body.action_dim), body.action_dim
            raise NotImplementedError('multi_continuous not supported yet')
        else:
            assert ps.is_integer(body.action_dim), body.action_dim
            if body.action_dim == 1:
                policy_out_dim = 2  # singleton stay as int
            else:
                policy_out_dim = body.action_dim * [2]
    return policy_out_dim
Example #4
0
def test_is_integer(case, expected):
    assert _.is_integer(case) == expected
Example #5
0
def file_upload_batch():
    try:
        fileHandler = FileHandler()
        # {
        # files:[{binary1}, {binary2}]
        # files_info: {sub_dir: '', file_key: '', curr_chunk: 1, total_chunks: 10, md5: 'abcdefghijklmn' }
        # | [
        #   {file_id: 1,sub_dir: '', file_key: '', curr_chunk: 1, total_chunks: 10, md5: 'abcdefghijklmn'},
        #   {file_id: 2,sub_dir: '', file_key: '', curr_chunk: 2, total_chunks: 10, md5: 'abcdefghijklmn'}
        # ]
        # }
        # 文件或分片列表,可以包含小的完整文件和部分分片混合
        files = []
        for f in request.files:
            files.append(request.files[f])

        # 文件信息
        orignal_files_info = request.form.get('files_info')
        orignal_files_info = json.loads(orignal_files_info) if isinstance(
            orignal_files_info, str) else orignal_files_info
        files_info = orignal_files_info
        if not files:
            return standard_expection('文件列表为空.')
        # 支持同时上传批量和同时上传单个
        if not isinstance(files_info, (list, dict)):
            return standard_expection('文件信息无法解析.')
        if isinstance(files_info, list) and len(files_info) != len(files):
            return standard_expection('文件信息与文件对象不匹配.')
        if isinstance(files_info, dict) and len(files) != 1:
            return standard_expection('文件信息与文件对象不匹配.')

        tenant_id = g.tenant_id if hasattr(g, 'tenant_id') else 0
        user_id = g.user_id if hasattr(g, 'user_id') else 0
        cache_expired_time = current_app.config['REDIS_CACHE_EXPIRE_FILE']

        files_info = [files_info] if isinstance(files_info,
                                                dict) else files_info

        if _.some(
                files_info, lambda x: x.get('total_chunks') is None or
            (x.get('curr_chunk') is not None and not _.is_integer(
                x.get('curr_chunk')))):
            return standard_expection('文件信息格式错误 files_info.')

        file_groups = _.group_by(files_info, lambda x: x.get('file_key'))
        files_set = {}
        for f in files:
            files_set[f.name] = f

        msg_list = []
        md5_error_list = []
        from app import aios_redis
        index = 0
        for file_key, file_partations_info in file_groups.items():
            # file_key: 每个文件的唯一标识, 如果分片,所有分片的file_key应该一致
            # file_partations_info: 一组分片(有可能是一个大文件的一部分分片)或单个完整文件信息
            for file_partation in file_partations_info:
                id = file_partation.get('id')
                # 文件原始名
                file_name = file_partation.get('file_name')
                # 默认约定目录为 <模块>/<企业>/<自定义目录>/原始文件名
                tenant_id = tenant_id
                sub_dir = file_partation.get(
                    'sub_dir', os.path.join('cs', str(tenant_id), file_key))
                # curr_chunk 从1开始
                curr_chunk = file_partation.get('curr_chunk')
                # 文件总分片数
                total_chunks = file_partation.get('total_chunks')
                # md5值,如果有则做CRC校验
                md5 = file_partation.get('md5')
                # 文件对象
                file = files_set[id]
                # file = files[index]
                index += 1

                absolute_dir_path = fileHandler.get_standard_sub_dir(sub_dir)
                absolute_file_path = os.path.join(absolute_dir_path,
                                                  f'{file_key}.{curr_chunk}')

                # 防止多次重传的问题
                # is_valid = True
                # for f in os.listdir(absolute_dir_path):
                #     # 有可能出现文件名中包含"-"
                #     if '-' in f.split('.')[1]:
                #         [(start, end)] = re.findall('.*\.(\d+)-(\d+)$', f)
                #         if int(start) <= int(curr_chunk) <= int(end):
                #             is_valid = False
                #             print('=====分片已存在', f, f'{file_key}.{curr_chunk}')
                #             if os.path.exists(absolute_file_path):
                #                 os.rename(absolute_file_path, absolute_file_path + '.deleted')
                #             continue
                # if is_valid is False:
                #     continue
                if os.path.exists(absolute_file_path):
                    os.remove(absolute_file_path)
                file.save(absolute_file_path)

                # 文件md5校验
                # if md5:
                #     is_valid, msg = fileHandler.valid_md5(absolute_file_path, md5)
                #     if not is_valid:
                #         md5_error_list.append({'file_key': file_key, 'curr_chunk': curr_chunk})
                #         continue

                aios_redis.set(f'plus_uploader:{file_key}:{curr_chunk}',
                               'done', cache_expired_time)

                fileHandler.log_print(file_key, curr_chunk,
                                      f'{curr_chunk}/{total_chunks}')

                # 发布消息,通知后台线程开始尝试合并文件
                msg = {
                    'file_key': file_key,
                    'dir_path': absolute_dir_path,
                    'curr_chunk': curr_chunk,
                    'total_chunks': total_chunks,
                    'file_name': file_name,
                    'tenant_id': tenant_id,
                    'user_id': user_id,
                    'cache_expired_time': cache_expired_time
                }
                msg_list.append(msg)

        if len(md5_error_list):
            print('文件MD5校验异常')
            return standard_expection(json.dumps(md5_error_list))

        succ_list, err_list = fileHandler.multi_process_handler(msg_list)
        if len(err_list):
            print('文件合并异常')
            return standard_expection(json.dumps(err_list))

        partations_info = []
        # 容器内目录
        container_dir = os.path.join(os.getenv('FLASK_CONFIG'),
                                     sub_dir.strip(os.path.sep)).replace(
                                         os.path.sep, '/')
        # 获取当前分片完成状态与存放目录
        for succ in succ_list:
            partations_info.append({
                'file_key': succ['file_key'],
                'curr_chunk': succ['curr_chunk'],
                'status': True,
                'host': container_dir,
                'msg': 'ok'
            })
        print('<成功>', orignal_files_info)
        return standard_response(partations_info, 200)
    except Exception as err:
        import traceback
        traceback.print_exc()
        print('<失败>')
        return standard_expection(str(err))
Example #6
0
    def __init__(self, net_spec, in_dim, out_dim):
        '''
        net_spec:
        conv_hid_layers: list containing dimensions of the convolutional hidden layers, each is a list representing hid_layer = out_d, kernel, stride, padding, dilation.
            Asssumed to all come before the flat layers.
            Note: a convolutional layer should specify the in_channel, out_channels, kernel_size, stride (of kernel steps), padding, and dilation (spacing between kernel points) E.g. [3, 16, (5, 5), 1, 0, (2, 2)]
            For more details, see http://pytorch.org/docs/master/nn.html#conv2d and https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md
        fc_hid_layers: list of fc layers following the convolutional layers
        hid_layers_activation: activation function for the hidden layers
        out_layer_activation: activation function for the output layer, same shape as out_dim
        init_fn: weight initialization function
        normalize: whether to divide by 255.0 to normalize image input
        batch_norm: whether to add batch normalization after each convolutional layer, excluding the input layer.
        clip_grad_val: clip gradient norm if value is not None
        loss_spec: measure of error between model predictions and correct outputs
        optim_spec: parameters for initializing the optimizer
        lr_scheduler_spec: Pytorch optim.lr_scheduler
        update_type: method to update network weights: 'replace' or 'polyak'
        update_frequency: how many total timesteps per update
        polyak_coef: ratio of polyak weight update
        gpu: whether to train using a GPU. Note this will only work if a GPU is available, othewise setting gpu=True does nothing
        '''
        assert len(in_dim) == 3  # image shape (c,w,h)
        nn.Module.__init__(self)
        super().__init__(net_spec, in_dim, out_dim)
        # set default
        util.set_attr(
            self,
            dict(
                out_layer_activation=None,
                init_fn=None,
                normalize=False,
                batch_norm=True,
                clip_grad_val=None,
                loss_spec={'name': 'MSELoss'},
                optim_spec={'name': 'Adam'},
                lr_scheduler_spec=None,
                update_type='replace',
                update_frequency=1,
                polyak_coef=0.0,
                gpu=False,
            ))
        util.set_attr(self, self.net_spec, [
            'conv_hid_layers',
            'fc_hid_layers',
            'hid_layers_activation',
            'out_layer_activation',
            'init_fn',
            'normalize',
            'batch_norm',
            'clip_grad_val',
            'loss_spec',
            'optim_spec',
            'lr_scheduler_spec',
            'update_type',
            'update_frequency',
            'polyak_coef',
            'gpu',
        ])

        # conv body
        self.conv_model = self.build_conv_layers(self.conv_hid_layers)
        self.conv_out_dim = self.get_conv_output_size()

        # fc body
        if ps.is_empty(self.fc_hid_layers):
            tail_in_dim = self.conv_out_dim
        else:
            # fc body from flattened conv
            self.fc_model = net_util.build_fc_model([self.conv_out_dim] +
                                                    self.fc_hid_layers,
                                                    self.hid_layers_activation)
            tail_in_dim = self.fc_hid_layers[-1]

        # tails. avoid list for single-tail for compute speed
        if ps.is_integer(self.out_dim):
            self.model_tail = net_util.build_fc_model(
                [tail_in_dim, self.out_dim], self.out_layer_activation)
        else:
            if not ps.is_list(self.out_layer_activation):
                self.out_layer_activation = [self.out_layer_activation
                                             ] * len(out_dim)
            assert len(self.out_layer_activation) == len(self.out_dim)
            tails = []
            for out_d, out_activ in zip(self.out_dim,
                                        self.out_layer_activation):
                tail = net_util.build_fc_model([tail_in_dim, out_d], out_activ)
                tails.append(tail)
            self.model_tails = nn.ModuleList(tails)

        net_util.init_layers(self, self.init_fn)
        self.loss_fn = net_util.get_loss_fn(self, self.loss_spec)
        self.to(self.device)
        self.train()
Example #7
0
    def __init__(self, net_spec, in_dim, out_dim):
        '''
        net_spec:
        cell_type: any of RNN, LSTM, GRU
        fc_hid_layers: list of fc layers preceeding the RNN layers
        hid_layers_activation: activation function for the fc hidden layers
        out_layer_activation: activation function for the output layer, same shape as out_dim
        rnn_hidden_size: rnn hidden_size
        rnn_num_layers: number of recurrent layers
        bidirectional: if RNN should be bidirectional
        seq_len: length of the history of being passed to the net
        init_fn: weight initialization function
        clip_grad_val: clip gradient norm if value is not None
        loss_spec: measure of error between model predictions and correct outputs
        optim_spec: parameters for initializing the optimizer
        lr_scheduler_spec: Pytorch optim.lr_scheduler
        update_type: method to update network weights: 'replace' or 'polyak'
        update_frequency: how many total timesteps per update
        polyak_coef: ratio of polyak weight update
        gpu: whether to train using a GPU. Note this will only work if a GPU is available, othewise setting gpu=True does nothing
        '''
        nn.Module.__init__(self)
        super(RecurrentNet, self).__init__(net_spec, in_dim, out_dim)
        # set default
        util.set_attr(
            self,
            dict(
                out_layer_activation=None,
                cell_type='GRU',
                rnn_num_layers=1,
                bidirectional=False,
                init_fn=None,
                clip_grad_val=None,
                loss_spec={'name': 'MSELoss'},
                optim_spec={'name': 'Adam'},
                lr_scheduler_spec=None,
                update_type='replace',
                update_frequency=1,
                polyak_coef=0.0,
                gpu=False,
            ))
        util.set_attr(self, self.net_spec, [
            'cell_type',
            'fc_hid_layers',
            'hid_layers_activation',
            'out_layer_activation',
            'rnn_hidden_size',
            'rnn_num_layers',
            'bidirectional',
            'seq_len',
            'init_fn',
            'clip_grad_val',
            'loss_spec',
            'optim_spec',
            'lr_scheduler_spec',
            'update_type',
            'update_frequency',
            'polyak_coef',
            'gpu',
        ])
        # fc body: state processing model
        if ps.is_empty(self.fc_hid_layers):
            self.rnn_input_dim = self.in_dim
        else:
            fc_dims = [self.in_dim] + self.fc_hid_layers
            self.fc_model = net_util.build_fc_model(fc_dims,
                                                    self.hid_layers_activation)
            self.rnn_input_dim = fc_dims[-1]

        # RNN model
        self.rnn_model = getattr(nn, net_util.get_nn_name(self.cell_type))(
            input_size=self.rnn_input_dim,
            hidden_size=self.rnn_hidden_size,
            num_layers=self.rnn_num_layers,
            batch_first=True,
            bidirectional=self.bidirectional)

        # tails. avoid list for single-tail for compute speed
        if ps.is_integer(self.out_dim):
            self.model_tail = net_util.build_fc_model(
                [self.rnn_hidden_size, self.out_dim],
                self.out_layer_activation)
        else:
            if not ps.is_list(self.out_layer_activation):
                self.out_layer_activation = [self.out_layer_activation
                                             ] * len(out_dim)
            assert len(self.out_layer_activation) == len(self.out_dim)
            tails = []
            for out_d, out_activ in zip(self.out_dim,
                                        self.out_layer_activation):
                tail = net_util.build_fc_model([self.rnn_hidden_size, out_d],
                                               out_activ)
                tails.append(tail)
            self.model_tails = nn.ModuleList(tails)

        net_util.init_layers(self, self.init_fn)
        for module in self.modules():
            module.to(self.device)
        self.loss_fn = net_util.get_loss_fn(self, self.loss_spec)
        self.optim = net_util.get_optim(self, self.optim_spec)
        self.lr_scheduler = net_util.get_lr_scheduler(self,
                                                      self.lr_scheduler_spec)
Example #8
0
    def __init__(self, net_spec, in_dim, out_dim):
        '''
        net_spec:
        hid_layers: list containing dimensions of the hidden layers
        hid_layers_activation: activation function for the hidden layers
        init_fn: weight initialization function
        clip_grad: whether to clip the gradient
        clip_grad_val: the clip value
        loss_spec: measure of error between model predictions and correct outputs
        optim_spec: parameters for initializing the optimizer
        lr_decay: function to decay learning rate
        lr_decay_frequency: how many total timesteps per decay
        lr_decay_min_timestep: minimum amount of total timesteps before starting decay
        lr_anneal_timestep: timestep to anneal lr decay
        update_type: method to update network weights: 'replace' or 'polyak'
        update_frequency: how many total timesteps per update
        polyak_coef: ratio of polyak weight update
        gpu: whether to train using a GPU. Note this will only work if a GPU is available, othewise setting gpu=True does nothing
        '''
        nn.Module.__init__(self)
        super(MLPNet, self).__init__(net_spec, in_dim, out_dim)
        # set default
        util.set_attr(self, dict(
            init_fn='xavier_uniform_',
            clip_grad=False,
            clip_grad_val=1.0,
            loss_spec={'name': 'MSELoss'},
            optim_spec={'name': 'Adam'},
            lr_decay='no_decay',
            update_type='replace',
            update_frequency=1,
            polyak_coef=0.0,
            gpu=False,
        ))
        util.set_attr(self, self.net_spec, [
            'separate',
            'hid_layers',
            'hid_layers_activation',
            'init_fn',
            'clip_grad',
            'clip_grad_val',
            'loss_spec',
            'optim_spec',
            'lr_decay',
            'lr_decay_frequency',
            'lr_decay_min_timestep',
            'lr_anneal_timestep',
            'update_type',
            'update_frequency',
            'polyak_coef',
            'gpu',
        ])

        dims = [self.in_dim] + self.hid_layers
        self.model = net_util.build_sequential(dims, self.hid_layers_activation)
        # add last layer with no activation
        if ps.is_integer(self.out_dim):
            self.model.add_module(str(len(self.model)), nn.Linear(dims[-1], self.out_dim))
        else:  # if more than 1 output, add last layer as tails separate from main model
            self.model_tails = nn.ModuleList([nn.Linear(dims[-1], out_d) for out_d in self.out_dim])

        net_util.init_layers(self, self.init_fn)
        for module in self.modules():
            module.to(self.device)
        self.loss_fn = net_util.get_loss_fn(self, self.loss_spec)
        self.optim = net_util.get_optim(self, self.optim_spec)
        self.lr_decay = getattr(net_util, self.lr_decay)