Example #1
0
File: ssh.py Project: Asdil/Asdil
    def putFile(self, file_path, remote_path, iscut=False):
        logger = None
        if self.log:
            logger = log.init_log(self.log_path)

        scp = SCPClient(self.ssh.get_transport())
        if not os.path.exists(file_path):
            print(f'{file_path} 文件不存在')
            if self.log:
                logger.info(f'{file_path} 文件不存在')

        _, _, _, file_name = tool.splitPath(file_path)
        _, _, _, remote_name = tool.splitPath(remote_path)

        # 可以只是文件夹路径
        if remote_name != file_name:
            remote_path = tool.pathJoin(remote_path, file_name)

        scp.put(file_path, remote_path)
        scp.close()

        if iscut:
            os.remove(file_path)
        print(f'{file_path} -----> {self.ip_name}{remote_path}')
        if self.log:
            logger.info(f'{file_path} -----> {self.ip_name}{remote_path}')
Example #2
0
def cutAllFiles(srcfile, dstfile, key=None, isreplace=False):
    if key is None:
        files = tool.getFiles(srcfile)
    else:
        files = tool.getFiles(srcfile, key=key)
    if isreplace:
        for _file in files:
            if isFile(_file):
                _, _, _, name = tool.splitPath(_file)
                if isExist(_file):
                    delFile(tool.pathJoin(dstfile, name))
                tool.cutFile(_file, dstfile)
            else:
                _, _, _, name = tool.splitPath(_file)
                if isExist(_file):
                    delDir(tool.pathJoin(dstfile, name))
                shutil.move(_file, dstfile + f'/{name}')
                print(f'copy {_file} -> dstfile/{name}')
    else:
        for _file in files:
            if isFile(_file):
                tool.cutFile(_file, dstfile)
            else:
                _, _, _, name = tool.splitPath(_file)
                shutil.move(_file, dstfile + f'/{name}')
                print(f'copy {_file} -> dstfile/{name}')
Example #3
0
File: ssh.py Project: Asdil/Asdil
 def getFile(self, file_path, remote_path):
     logger = None
     if self.log:
         logger = log.init_log(self.log_path)
     scp = SCPClient(self.ssh.get_transport())
     _, _, _, file_name = tool.splitPath(file_path)
     _, _, _, remote_name = tool.splitPath(remote_path)
     # 可以只是文件夹路径
     if remote_name != file_name:
         remote_path = tool.pathJoin(remote_path, file_name)
     scp.get(file_path, remote_path)
     scp.close()
     print(f'{self.ip_name}{file_path} -----> {remote_path}')
     if self.log:
         logger.info(f'{self.ip_name}{file_path} -----> {remote_path}')
Example #4
0
def LGB(argsDict):
    num_leaves = argsDict["num_leaves"] + 25
    max_depth = argsDict["max_depth"]
    learning_rate = argsDict["learning_rate"] * 0.02 + 0.05
    n_estimators = argsDict['n_estimators'] * 10 + 50
    min_child_weight = argsDict['min_child_weight']
    min_child_samples = argsDict['min_child_samples'] + 18
    subsample = argsDict["subsample"] * 0.1 + 0.7
    colsample_bytree = argsDict["colsample_bytree"]
    reg_alpha = argsDict["reg_alpha"]
    reg_lambda = argsDict["reg_lambda"]
    path = argsDict['path']
    data = np.load(path)
    data = data.astype('float32')
    data[data == 2] = 0.5
    X, Y = data[:, :-1], data[:, -1]
    _, rsid, _, _ = tool.splitPath(path)
    gbm = LGBMClassifier(device='gpu',
                         gpu_platform_id=0,
                         gpu_device_id=0,
                         max_bin=255,
                         num_leaves=num_leaves,
                         max_depth=max_depth,
                         learning_rate=learning_rate,
                         n_estimators=n_estimators,
                         min_child_weight=min_child_weight,
                         min_child_samples=min_child_samples,
                         subsample=subsample,
                         colsample_bytree=colsample_bytree,
                         reg_alpha=reg_alpha,
                         reg_lambda=reg_lambda,
                         n_jobs=1)
    # kfold = StratifiedKFold(n_splits=5, random_state=42)
    kfold = StratifiedKFold(n_splits=5)
    metric = cross_val_score(gbm, X, Y, cv=kfold, scoring="roc_auc").mean()
    logger = log.init_log()
    logger.info(f"{rsid} 的训练得分为: {metric}")
    print(f"{rsid} 的训练得分为: {metric}")
    return -metric
Example #5
0
def XGB(argsDict):
    max_depth = argsDict["max_depth"] + 1
    n_estimators = argsDict['n_estimators'] * 10 + 50
    learning_rate = argsDict["learning_rate"] * 0.02 + 0.05
    subsample = argsDict["subsample"] * 0.1 + 0.7
    min_child_weight = argsDict["min_child_weight"] + 1
    reg_alpha = argsDict["reg_alpha"]
    reg_lambda = argsDict["reg_lambda"]
    colsample_bytree = argsDict["colsample_bytree"]

    path = argsDict['path']
    data = np.load(path)
    data = data.astype('float32')
    data[data == 2] = 0.5
    X, Y = data[:, :-1], data[:, -1]
    _, rsid, _, _ = tool.splitPath(path)

    gbm = XGBClassifier(
        tree_method='gpu_hist',
        max_bin=255,
        objective="binary:logistic",
        max_depth=max_depth,  #最大深度
        n_estimators=n_estimators,  #树的数量
        learning_rate=learning_rate,  #学习率
        subsample=subsample,  #采样数
        min_child_weight=min_child_weight,  #孩子数
        max_delta_step=10,  #10步不降则停止
        reg_alpha=reg_alpha,
        reg_lambda=reg_lambda,
        colsample_bytree=colsample_bytree,
    )
    kfold = StratifiedKFold(n_splits=5, random_state=42)
    metric = cross_val_score(gbm, X, Y, cv=kfold, scoring="roc_auc").mean()

    logger = log.init_log()
    logger.info(f"{rsid} xgb的训练得分为: {metric}")
    print(f"{rsid} xgb的训练得分为: {metric}")
    return -metric
Example #6
0
def pipeline(path):
    max_evals = 30
    _, name, _, _ = tool.splitPath(path)
    logger.info(f'开始训练位点: {name}')
    print(f'开始训练位点: {name}')

    data = np.load(path)

    try:
        X, Y = data[:, :-1], data[:, -1]
    except:
        logger.info(f'位点: {name} 文件读取错误')
        print(f'位点: {name} 文件读取错误')
        return 0

    if len(np.unique(Y)) == 1:
        logger.info(f'位点: {name} 只有一种类标签')
        print(f'位点: {name} 只有一种类标签')
        return 0

    tmp = Y.tolist()
    tmp = dict(Counter(tmp))
    if tmp[0] > tmp[1]:
        ma, mi = tmp[0], tmp[1]
    else:
        ma, mi = tmp[1], tmp[0]
    if mi / ma < 0.01:
        logger.info(f'位点: {name} 为低频位点')
        print(f'位点: {name} 为低频位点')
        return 0

    space = {
        "num_leaves":
        hp.randint("num_leaves", 5),  # [0, upper)
        "max_depth":
        hp.choice("max_depth", [-1, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]),
        "learning_rate":
        hp.uniform("learning_rate", 0.001, 2),  # 0.001-2均匀分布
        "n_estimators":
        hp.randint("n_estimators", 5),  # [0,1000)
        "min_child_weight":
        hp.uniform("min_child_weight", 0.001, 0.01),  # 0.001-2均匀分布
        "min_child_samples":
        hp.randint("min_child_samples", 10),  # [0,1000)
        "subsample":
        hp.randint("subsample", 4),
        "colsample_bytree":
        hp.choice("colsample_bytree", [0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1.0]),
        "reg_alpha":
        hp.choice("reg_alpha", [1e-5, 1e-4, 1e-3, 1e-2, 0.1, 1]),
        "reg_lambda":
        hp.choice("reg_lambda", [1e-5, 1e-4, 1e-3, 1e-2, 0.1, 1, 10, 100]),
        "path":
        hp.choice('path', [path])
    }

    star = time.time()
    algo = partial(tpe.suggest, n_startup_jobs=1)  # 优化算法种类
    best = fmin(LGB, space, algo=algo,
                max_evals=max_evals)  # max_evals表示想要训练的最大模型数量,越大越容易找到最优解

    best = RECOVERLGB(best)
    TRAINLGB(X, Y, best, name, save_path + name + '.lgb', logger)
    end = time.time()
    times = end - star
    logger.info(f'位点: {name} 用时为: {times}')