Example #1
0
    async def login(user, url_name, url_password, captcha=''):
        validate = ''
        challenge = ''
        extra_params = {
            'seccode': f'{validate}|jordan' if validate else '',
            'validate': validate,
            'challenge': challenge,
            'username': url_name,
            'password': url_password,
            'ts': utils.curr_time(),
        }
        params = user.app_sign(extra_params)

        # url_password 存在一些 % 这些,b站要求作为 string 不编码为 "%25"
        # aiohttp doc 符合,但是
        # https://github.com/aio-libs/aiohttp/blob/10c8ce9567d008d4f92a99ffe45f8d0878e99275/aiohttp/client_reqrep.py#L215-L219
        # yarl 兼容问题
        # 故手动处理
        params_str = utils.prepare_params(params)
        url_aiohttp = f'https://passport.bilibili.com/x/passport-login/oauth2/login?{params_str}'
        json_rsp = await user.login_session.request_json(
            'POST',
            url_aiohttp,
            headers=user.app.headers,
            params=None,
            ctrl=LOGIN_CTRL)
        return json_rsp
Example #2
0
    async def refresh_token(user):
        dict_cookie = dict()
        for param in user.dict_user['cookie'].split(';'):
            key, value = param.split('=')
            dict_cookie[key] = value

        extra_params = {
            'access_key': user.dict_user['access_key'],
            'access_token': user.dict_user['access_key'],
            'refresh_token': user.dict_user['refresh_token'],
            'ts': utils.curr_time(),
            **dict_cookie
        }
        params = user.app_sign(extra_params)
        # 这里没办法,cookie 里面有特殊字符,与 yarl 兼容无关
        params_str = utils.prepare_params(params)
        url = f'https://passport.bilibili.com/api/v2/oauth2/refresh_token?{params_str}'
        json_rsp = await user.login_session.request_json(
            'POST',
            url,
            headers=user.app.headers,
            params=None,
            ctrl=LOGIN_CTRL)
        print('json_rsp', json_rsp)
        return json_rsp
Example #3
0
def main():
    dl = DataLoader(
        source_path='../temp/letters_source.txt',
        target_path='../temp/letters_target.txt')
    sources, targets = dl.load()
    
    tf_estimator = tf.estimator.Estimator(
        tf_estimator_model_fn, params=prepare_params(dl), model_dir=args.model_dir)
    
    for epoch in range(args.num_epochs):
        tf_estimator.train(tf.estimator.inputs.numpy_input_fn(
            x = {'source':sources, 'target':targets},
            batch_size = args.batch_size,
            num_epochs = None,
            shuffle = True), steps=1000)
        greedy_decode(['apple', 'common', 'zhedong'], tf_estimator, dl)
Example #4
0
def main():
    dl = DataLoader(source_path='../temp/dialog_source.txt',
                    target_path='../temp/dialog_target.txt')
    sources, targets = dl.load()
    print('Source Vocab Size:', len(dl.source_word2idx))
    print('Target Vocab Size:', len(dl.target_word2idx))

    tf_estimator = tf.estimator.Estimator(tf_estimator_model_fn,
                                          params=prepare_params(dl))

    for epoch in range(1):
        tf_estimator.train(
            tf.estimator.inputs.numpy_input_fn(x={
                'source': sources,
                'target': targets
            },
                                               batch_size=args.batch_size,
                                               num_epochs=1,
                                               shuffle=True))
        greedy_decode(['你是谁', '你喜欢我吗', '给我唱一首歌', '我帅吗'], tf_estimator, dl)
Example #5
0
    def fit_predict(self, **kwargs) -> float:
        """
        The fit_predict method is created this way, as a black box for the Bayesian optimization
        library to work. It accepts the hyperparamters and creates the predictions with them.

        It keeps track of 3 metrics (mae, rmse and theshold mae) using a KFold crossvalidation and 
        one or both of the algorithms.

        This approach was used (instead of using cv included in the packages) for getting and combining
        predictions at the crossvalidation stage. Also, different kind of manipulations can be made
        with the data prior to prediction.

        kwargs: dict
            key / value pairs of the hyperparameters of xgb and/or lgb

        Returns
        -------
            float
                The selected metric to be optimized.
        """
        rmse_final = 0.0
        mae_final = 0.0
        mae_thresh_final = 0.0

        rmse_final_train = 0.0
        mae_final_train = 0.0
        mae_thresh_final_train = 0.0

        self.params_xgb = prepare_params(self.params_xgb, self.config, kwargs, 'xgb')
        self.params_lgb = prepare_params(self.params_lgb, self.config, kwargs, 'lgb')

        folds = KFold(n_splits=self.config['n_fold'], shuffle=self.config['shuffle'], random_state=self.config['random_state'])

        for train_index, valid_index in folds.split(self.X, self.y):
            X_train, X_valid = self.X.iloc[train_index], self.X.iloc[valid_index]
            y_train, y_valid = self.y.iloc[train_index], self.y.iloc[valid_index]

            if self.config['algorithm'] in ['xgb', 'both']:
                xgb_train = xgb.DMatrix(X_train, y_train)
                xgb_valid = xgb.DMatrix(X_valid, y_valid)

                bst_xgb = xgb.train(self.params_xgb,
                                    xgb_train,
                                    num_boost_round=self.config['num_boost_round'],
                                    early_stopping_rounds=self.config['early_stopping_rounds'],
                                    verbose_eval=self.config['verbose'],
                                    evals=[(xgb_valid, 'eval')])
                
                xgb_best_iteration = bst_xgb.best_iteration
                
                xgb_importance = bst_xgb.get_score(importance_type='total_gain')
                self.feature_importance['gain_xgb'] += self.feature_importance['feature'].apply(lambda x: xgb_importance[x] if x in xgb_importance else 0.0)
                
                xgb_valid_pred = bst_xgb.predict(xgb.DMatrix(X_valid), ntree_limit=xgb_best_iteration)
                xgb_train_pred = bst_xgb.predict(xgb.DMatrix(X_train), ntree_limit=xgb_best_iteration)


            if self.config['algorithm'] in ['lgb', 'both']: 
                lgb_train = lgb.Dataset(X_train, y_train)
                lgb_valid = lgb.Dataset(X_valid, y_valid)

                bst_lgb = lgb.train(self.params_lgb,
                                    lgb_train,
                                    num_boost_round=self.config['num_boost_round'],
                                    early_stopping_rounds=self.config['early_stopping_rounds'],
                                    verbose_eval=self.config['verbose_eval'],
                                    valid_sets=[lgb_valid])

                lgb_best_iteration = bst_lgb.best_iteration

                self.feature_importance['gain_lgb'] += np.array(bst_lgb.feature_importance(importance_type='gain'))
                
                lgb_valid_pred = bst_lgb.predict(X_valid, num_iteration=lgb_best_iteration)
                lgb_train_pred = bst_lgb.predict(X_train, num_iteration=lgb_best_iteration)

            if self.config['algorithm'] == 'xgb':
                y_pred = xgb_valid_pred
                y_pred_train = xgb_train_pred
            elif self.config['algorithm'] == 'lgb':
                y_pred = lgb_valid_pred
                y_pred_train = lgb_train_pred
            elif self.config['algorithm'] == 'both':
                balance = kwargs['balance']
                y_pred = balance * xgb_valid_pred + (1.0 - balance) * lgb_valid_pred
                y_pred_train = balance * xgb_train_pred + (1.0 - balance) * lgb_train_pred

            valid_mae_thresh = mae_thresh(y_valid, y_pred, thresh=self.config['threshold'])
            valid_rmse = rmse(y_valid, y_pred)
            valid_mae = mean_absolute_error(y_valid, y_pred)

            mae_thresh_final += valid_mae_thresh / self.config['n_fold']
            rmse_final += valid_rmse / self.config['n_fold']
            mae_final += valid_mae / self.config['n_fold']
            
            train_mae_thresh = mae_thresh(y_train, y_pred_train, thresh=self.config['threshold'])
            train_rmse = rmse(y_train, y_pred_train)
            train_mae = mean_absolute_error(y_train, y_pred_train)

            mae_thresh_final_train += train_mae_thresh / self.config['n_fold']
            rmse_final_train += train_rmse / self.config['n_fold']
            mae_final_train += train_mae / self.config['n_fold']

        self.count += 1

        print('Iteration {} - Validation / Train - MAE: {:.3f} / {:.3f}, RMSE: {:.3f} / {:.3f}, MAE above {}: {:.3f} / {:.3f}'.format(self.count, 
                                    mae_final, mae_final_train, rmse_final, rmse_final_train, self.config['threshold'], mae_thresh_final, mae_thresh_final_train))
        self.feature_importance.to_csv(self.config['feat_imp_path'], index=None)

        if self.config['bo_optimize'] == 'rmse': result = -rmse_final
        elif self.config['bo_optimize'] == 'mae': result = -mae_final
        elif self.config['bo_optimize'] == 'mae_t': result = -mae_thresh_final

        return result