def lowPrice_data_main(params):
    write_idList(params, cursor)
    utils.delete_before4_localData(params["localFileName_lowPrice_idList"],
                                   params)

    if os.path.exists(params["localFileName_org_lowPrice_data"]):
        os.remove(params["localFileName_org_lowPrice_data"])
    columns = ["queryDate", 'price', 'id', 'org', 'dst']
    with open(params["localFileName_org_lowPrice_data"], 'a') as f_write:
        f_write.write(",".join(columns))
        f_write.write("\n")
        f_write.seek(2)
        p = Pool(10)
        counter = 0
        with open(params["localFileName_lowPrice_idList"], 'r') as f_read:
            for line in f_read:
                counter += 1
                L = line.strip().split(',')
                p.apply_async(lowPrice_data, args=(
                    params,
                    L,
                ))
        p.close()
        p.join()
        logger.info("=====\"{}\" finished======".format(
            params["localFileName_org_lowPrice_data"].split('/')[-1]))
    utils.delete_before4_localData(params["localFileName_org_lowPrice_data"],
                                   params)
    utils.upload_to_hdfs(params["localFileName_org_lowPrice_data"],
                         params["sparkDirName_org_lowPrice_data"], params)
def infoBase_data(params):
    columns = ["infoBase_id", "departTime", "arriveTime", "isShare"]
    with open(params["localFileName_org_infoBase_data"], 'w') as f:
        f.write(','.join(columns))
        f.write('\n')
        for sample in cursor4.find(
            {'date': {
                '$gte': datetime.today().strftime("%Y-%m-%d")
            }}):
            infoBase_id = sample.get("_id")
            departtime = sample.get('origindeparttime')
            arrivetime = sample.get('originarrivetime')
            isShare = sample.get('isshare')
            try:
                content = ','.join(
                    [infoBase_id, departtime, arrivetime,
                     str(isShare)])
                f.write(content + '\n')
            except:
                continue
        logger.info("====\"{}\" finished====".format(
            params["localFileName_org_infoBase_data"].split('/')[-1]))
    utils.delete_before4_localData(params["localFileName_org_infoBase_data"],
                                   params)
    utils.upload_to_hdfs(params["localFileName_org_infoBase_data"],
                         params["sparkDirName_org_infoBase_data"], params)
def seatleft_data(params):
    columns = ['queryDatetime', 'seatLeft', 'seatLeft_id']
    with open(params["localFileName_org_seatLeft_data"], 'w') as f:
        f.write(','.join(columns))
        f.write('\n')
        f.seek(2)
        for sample in cursor3.find({}):
            seatLeft_id = sample.get('_id')
            df = pd.DataFrame.from_dict(
                sample.get('fc'),
                orient='index').reset_index().rename(columns={
                    'index': 'queryDatetime',
                    0: 'seatLeft'
                })
            df['seatLeft_id'] = seatLeft_id
            df.to_csv(params["localFileName_org_seatLeft_data"],
                      header=False,
                      index=False,
                      mode='a')
        logger.info("====\"{}\" finished====".format(
            params["localFileName_org_seatLeft_data"].split('/')[-1]))
    utils.delete_before4_localData(params["localFileName_org_seatLeft_data"],
                                   params)
    utils.upload_to_hdfs(params["localFileName_org_seatLeft_data"],
                         params["sparkDirName_org_seatLeft_data"], params)
def orgPrice_data(params):
    columns = ["orgPrice_id", "fc", "orgPrice"]
    with open(params["localFileName_org_orgPrice_data"], 'w') as f:
        f.write(','.join(columns))
        f.write('\n')
        for sample in cursor1.find(
            {'date': {
                '$gte': datetime.today().strftime("%Y-%m-%d")
            }}):
            orgPrice_id = sample.get('_id')
            del sample['_id']
            del sample["date"]
            del sample["ut"]
            del sample["src"]
            for key, value in sample.items():
                try:
                    orgPrice = value.get('Y').get('price')
                    content = ','.join([orgPrice_id, key, orgPrice])
                    f.write(content + '\n')
                except:
                    continue
        logger.info("====\"{}\" finished====".format(
            params["localFileName_org_orgPrice_data"].split('/')[-1]))
    utils.delete_before4_localData(params["localFileName_org_orgPrice_data"],
                                   params)
    utils.upload_to_hdfs(params["localFileName_org_orgPrice_data"],
                         params["sparkDirName_org_orgPrice_data"], params)
def write_to_HDFS(df_all, params):

    #起飞时间大于generateDate为需要预测的线上数据,起飞时间小于generateDate为训练数据,起飞时间等于generateDate的数据不需要保留
    #从所有数据中随机抽取4千万的数据,作为训练集
    df_train_DNN = df_all.filter(df_all.departDate < params["generateDate_str2"])\
                     .drop(*list(set(params['dropFeatures'])-set(params['baseColumns'])))\
                     .sample(False, float("%.4f" % (6e7/df_all.count())))\
                     .orderBy(rand())
    # df_train_DNN.cache()
    df_train_DNN.write.format('parquet').save(params["sparkHost"] +
                                              params["sparkDirName_trainData"],
                                              mode='overwrite')
    logger.info("====\"{}\" write to HDFS finished ====".format(
        params["sparkDirName_trainData"]))
    utils.delete_before4_sparkData(params["sparkDirName_trainData"], params)

    # df_trainSample_DNN = df_train_DNN.sample(False, float("%.4f" % (2e5/df_train_DNN.count())))
    # df_trainSample_DNN.write.format('parquet').save(params["sparkHost"] + params["sparkDirName_trainSampleData"], mode='overwrite')
    # logger.info("====\"{}\" write to HDFS finished ====".format(params["sparkDirName_trainSampleData"]))
    # utils.delete_before4_sparkData(params["sparkDirName_trainSampleData"], params)

    df_online_DNN = df_all.filter(df_all.departDate > params["generateDate_str2"])\
                     .drop(*list(set(params['dropFeatures']) - set(params['baseColumns'])))
    df_online_DNN.write.format('parquet').save(
        params["sparkHost"] + params["sparkDirName_onlineData"],
        mode='overwrite')
    logger.info("====\"{}\" write to HDFS finished ====".format(
        params["sparkDirName_onlineData"]))
    utils.delete_before4_sparkData(params["sparkDirName_onlineData"], params)
Esempio n. 6
0
def org_ticketHistory_data(params):
    with open(params["localFileName_org_ticketHistory_data"], 'w') as f:
        columns = ['queryDatetime', 'hasTicket', 'id']
        f.write(','.join(columns))
        f.write('\n')
        f.seek(0, 2)
        for sample in cursor_ticket.find({}):
            id = sample.get('_id')
            del sample["_id"]
            try:
                del sample["ut"]
                del sample["noticket"]
                del sample["hasticket"]
            except:
                pass
            df = pd.DataFrame.from_dict(
                sample, orient='index').reset_index().rename(columns={
                    'index': 'queryDatetime',
                    0: 'hasTicket'
                })
            df['id'] = id
            df.to_csv(params["localFileName_org_ticketHistory_data"],
                      mode='a',
                      header=False,
                      index=False)
    logger.info("====\"{}\" finished====".format(
        params["localFileName_org_ticketHistory_data"].split('/')[-1]))
    utils.delete_before2_localData(
        params["localFileName_org_ticketHistory_data"], params)
    utils.upload_to_hdfs(params["localFileName_org_ticketHistory_data"],
                         params["sparkDirName_org_ticketHistory_data"], params)
Esempio n. 7
0
def delete_before2_localData(fileName, params):
    before2_dateStr1 = datetime.strftime(
        params["generateDate"] - timedelta(days=2), "%Y%m%d")
    pattern = re.compile(r'\d{8}')
    before2_fileName = re.sub(pattern, before2_dateStr1, fileName)
    if os.path.exists(before2_fileName):
        os.remove(before2_fileName)
        logger.info("====\"{}\" delete finished ====".format(before2_fileName))
Esempio n. 8
0
def upload_to_hdfs(localFileName, sparkDirName, params):
    clientHdfs = client.InsecureClient(params["hdfsHost"], user="******")
    if sparkDirName.split('/')[-1] in clientHdfs.list(
            os.path.dirname(sparkDirName)):
        clientHdfs.delete(sparkDirName, recursive=True)
    clientHdfs.upload(sparkDirName, localFileName)
    logger.info("====\"{}\" upload to HDFS finished====".format(
        localFileName.split('/')[-1]))
    delete_before2_sparkData(sparkDirName, params)
Esempio n. 9
0
def delete_before2_sparkData(fileName, params):
    clientHdfs = client.InsecureClient(params["hdfsHost"], user="******")
    before2_dateStr1 = datetime.strftime(
        params["generateDate"] - timedelta(days=2), "%Y%m%d")
    pattern = re.compile(r'\d{8}')
    before2_fileName = re.sub(pattern, before2_dateStr1, fileName)
    if before2_fileName in clientHdfs.list(os.path.dirname(fileName)):
        clientHdfs.delete(before2_fileName, recursive=True)
        logger.info("====\"{}\" delete finished ====".format(before2_fileName))
def cronTask():
    logger.info("启动定时任务,间隔 {} 分钟".format(interval))

    # 创建版本库对象
    repo = Repo('sys.argv[1]')
    repo.commit('-m', '自动上传至GitHub')
    # 获取远程仓库
    remote = repo.remote()
    # 推送本地修改到远程仓库
    remote.push()
Esempio n. 11
0
 def _init_write_to_mongo(self):
     data = json.loads(self.df.to_json(orient='records'))
     requests = []
     for sample in data:
         requests.append(
             UpdateOne({'_id': sample.get('_id')}, {'$set': sample},
                       upsert=True))
     self.cursor.bulk_write(requests, ordered=False)
     logger.info("====update mongoDB finished====")
     self.cursor.delete_many(
         {'queryDate': {
             '$lt': self.params["yesterday_str2"]
         }})
Esempio n. 12
0
def run_load_model_predict(params, dfm_params):
    clientHdfs = client.InsecureClient(params['hdfsHost'], user="******")
    fileNames = clientHdfs.list(params['sparkDirName_onlineData'])
    fileNames.remove('_SUCCESS')
    featureDict, _, _ = get_featureDict_info(params)
    dataParser = DataParser(params, featureDict)
    df_online = load_data(clientHdfs, params)
    df_online_index, df_online_value = dataParser.data_parser(df_online,
                                                              has_label=False)
    deep_model_predict = DeepFM_model_predict(params, dfm_params)
    deep_model_predict.write_result(params, df_online, df_online_index,
                                    df_online_value)
    deep_model_predict.sess.close()
    logger.info("====\"{}\" write finished====".format(
        params["localFileName_deepFM_result"].split("/")[-1]))
def write_idList(params, cursor):
    stringIO_temp = StringIO()
    count = 0
    with open(params["localFileName_lowPrice_idList"],
              'w') as f:  # 遍历mongoDB中的ID,写入文件
        for sample in cursor.find():
            count += 1
            stringIO_temp.write(sample.get('_id') + ',')
            if count % 10000 == 0:
                content = stringIO_temp.getvalue().strip(',') + '\n'
                f.write(content)
                stringIO_temp = StringIO()
        content = stringIO_temp.getvalue().strip(',') + '\n'
        f.write(content)
        logger.info("====\"{}\" finished====".format(
            params["localFileName_lowPrice_idList"].split('/')[-1]))
Esempio n. 14
0
 def early_stop(self, df_index, df_value, y_label, epoch, counter, params):
     valScore, _ = self.evaluate(df_index, df_value, y_label)
     if self.best_valScore < valScore:
         self.best_valScore = valScore
         self.lessScores_container = []
         self.best_valScore_epoch = epoch + 1
         self.best_valScore_counter = counter
         self.saver.save(self.sess,
                         "{}-{}".format(params['localDirName_deepFM_model'],
                                        params["generateDate_str1"]),
                         global_step=0)
     else:
         self.lessScores_container.append(valScore)
         if len(self.lessScores_container) > 10:
             logger.info(
                 "====deepFM train Model best_counter is {}-{}".format(
                     self.best_valScore_epoch, self.best_valScore_counter))
             self.earlyStop_info = True
     return self.earlyStop_info
Esempio n. 15
0
def org_trainStation_data(params):
    with open(params["localFileName_org_trainStation_data"], 'w') as f:
        columns = [
            '_id', 'jianpin', 'code', 'quanpin', 'name', 'citycn', 'latitude',
            'longitude', 'citycode', 'realdistancesmap', 'site', 'distancesmap'
        ]
        f.write(','.join(columns))
        f.write('\n')
        f.seek(0, 2)
        for sample in cursor_station.find({}):
            content = ','.join(map(str, sample.values()))
            f.write(content + '\n')
        logger.info("====\"{}\" finished====".format(
            params["localFileName_org_trainStation_data"].split('/')[-1]))
        utils.delete_before2_localData(
            params["localFileName_org_trainStation_data"], params)
        utils.upload_to_hdfs(params["localFileName_org_trainStation_data"],
                             params["sparkDirName_org_trainStation_data"],
                             params)
def write_DCN_featureDict(df_all, params):
    df = df_all.drop(*params["dropFeatures"]).drop(params['label'])
    featureDict = {}
    tc = 0
    for colName in df.columns:
        if colName in params["numericCols"]:
            continue
        else:  # colName in categoryCols
            uniqueFeature = df.select(
                colName).distinct().toPandas()[colName].astype('float').values
            featureDict[colName] = dict(
                zip(uniqueFeature, range(tc,
                                         len(uniqueFeature) + tc)))
            tc = tc + len(uniqueFeature)
    with open(params["featureDict_fileName"], 'wb') as f:
        pickle.dump(featureDict, f)
        logger.info("====\"{}\" finished ====".format(
            params["featureDict_fileName"].split('/')[-1]))
    utils.delete_before4_localData(params["featureDict_fileName"], params)
def lowPrice_online_data(params):
    tomorrow_str2 = datetime.strftime(
        params["generateDate"] + timedelta(days=1), '%Y-%m-%d')
    after30_str2 = datetime.strftime(
        params["generateDate"] + timedelta(days=30), '%Y-%m-%d')
    monthDay_list = pd.date_range(
        start=tomorrow_str2, end=after30_str2,
        freq='d').map(lambda x: datetime.strftime(x, '%m-%d'))
    columns = ["queryDate", 'price', 'id', 'org', 'dst']
    with open(params["localFileName_org_lowPrice_onlineData"], 'w') as f:
        f.write(','.join(columns))
        f.write('\n')
        for monthDay in monthDay_list:
            #对于online数据,根据id的条件筛选,然后只要拿到最大查询日期对应的价格即可(一条记录)。进一步的判断信息在spark上完成
            #如果最大查询日期距离今天相隔7天以内,即该id在最近7天有价格记录,则将最大查询日期改为yesterday(模型只能通过昨天的信息,预测今天之后的价格趋势)
            for sample in cursor.find(
                {'_id': {
                    "$regex": r'.*{}$'.format(monthDay)
                }}):
                lowPrice_id = sample.get('_id')
                del sample['_id']
                org = sample.get('dairport')
                del sample['dairport']
                dst = sample.get('aairport')
                del sample['aairport']
                # df = pd.DataFrame.from_dict(sample, orient='index').reset_index().rename(
                #     columns={'index': 'queryDate', 0: 'price'})
                queryDate = max(sample.keys())
                price = sample.get(queryDate)
                # historyLowPrice_fn_domestic中有异常,例如_id="3U3100_null_null_09-24"
                try:
                    content = ','.join(
                        [queryDate, price, lowPrice_id, org, dst])
                    f.write(content + '\n')
                except:
                    continue
        logger.info("=====\"{}\" finished======".format(
            params["localFileName_org_lowPrice_onlineData"].split('/')[-1]))
    utils.delete_before4_localData(
        params["localFileName_org_lowPrice_onlineData"], params)
    utils.upload_to_hdfs(params["localFileName_org_lowPrice_onlineData"],
                         params["sparkDirName_org_lowPrice_onlineData"],
                         params)
def globalAirport_data(params):
    columns = ["Airport_code", "latitude", "longitude"]
    with open(params["localFileName_org_Airport_data"], 'w') as f:
        f.write(','.join(columns))
        f.write('\n')
        for sample in cursor2.find({}):
            Airport_code = sample.get("_id")
            latitude = sample.get("latitude")
            longitude = sample.get("longitude")
            try:
                content = ','.join([Airport_code, latitude, longitude])
                f.write(content + '\n')
            except:
                continue
        logger.info("====\"{}\" finished====".format(
            params["localFileName_org_Airport_data"].split('/')[-1]))
    utils.delete_before4_localData(params["localFileName_org_Airport_data"],
                                   params)
    utils.upload_to_hdfs(params["localFileName_org_Airport_data"],
                         params["sparkDirName_org_Airport_data"], params)
Esempio n. 19
0
def initialize():
    """
    初始化函数,在项目每次启动都杀掉项目中已有的进程,然后重新根据任务状态重新启动。重启之后之前的进程会退出销毁。
    :return:
    """
    logger.info("start init function")
    task_list = list(mongodb.TASK.find())
    if len(task_list) == 0:
        return 'no tasks need init'
    for task in task_list:
        pid = task.get('pid')
        if pid:
            try:
                os.kill(pid, signal.SIGKILL)
                logger.info('initialize function kill pid:{} success'.format(pid))
            except Exception as e:
                logger.info('initialize function kill pid:{} failed'.format(pid))
                logger.info(e)
            mongodb.TASK.find_one_and_update({'_id': task['_id']}, {'$set': {'pid': ''}})
    logger.info("finish init")
    return 'initialize success'
Esempio n. 20
0
def generate_test_data(params):
    df_train_DNN = spark.read.format('parquet').load(
        params["sparkHost"] + params["sparkDirName_trainData"])

    df_train_DNN_test = df_train_DNN.filter(col('queryDate') < valDate)
    df_train_DNN_test.repartition(200).write.format('parquet').save(
        params["sparkHost"] + params["sparkDirName_trainData_test"],
        mode='overwrite')
    logger.info("====\"{}\" write to HDFS finished ====".format(
        params["sparkDirName_trainData_test"]))

    df_trainSample_DNN_test = df_train_DNN_test.sample(
        False, float("%.4f" % (2e5 / df_train_DNN_test.count())))
    df_trainSample_DNN_test.write.format('parquet').save(
        params["sparkHost"] + params["sparkDirName_trainSampleData_test"],
        mode='overwrite')
    logger.info("====\"{}\" write to HDFS finished ====".format(
        params["sparkDirName_trainSampleData_test"]))

    df_val_DNN_test = df_train_DNN.filter(col('queryDate') == valDate)
    df_val_DNN_test.write.format('parquet').save(
        params["sparkHost"] + params["sparkDirName_valData_test"],
        mode='overwrite')
    logger.info("====\"{}\" write to HDFS finished ====".format(
        params["sparkDirName_valData_test"]))
def seatleft_data_add(params):
    tomorrow_str2 = datetime.strftime(
        params["generateDate"] + timedelta(days=1), '%Y-%m-%d')
    today_monthDay = datetime.strftime(params["generateDate"], '%m-%d')
    yesterday_str2 = (params["generateDate"] -
                      timedelta(days=1)).strftime('%Y-%m-%d')
    after30_str2 = datetime.strftime(
        params["generateDate"] + timedelta(days=30), '%Y-%m-%d')
    monthDay_list = pd.date_range(
        start=yesterday_str2, end=after30_str2,
        freq='d').map(lambda x: datetime.strftime(x, '%m-%d')).to_list()
    monthDay_list.remove(today_monthDay)
    columns = ['queryDatetime', 'seatLeft', 'seatLeft_id']
    with open(params["localFileName_org_seatLeft_data_add"], 'w') as f:
        f.write(','.join(columns))
        f.write('\n')
        f.seek(2)
        for monthDay in monthDay_list:
            for sample in cursor3.find(
                {'_id': {
                    "$regex": r'.*{}$'.format(monthDay)
                }}):
                seatLeft_id = sample.get('_id')
                df = pd.DataFrame.from_dict(
                    sample.get('fc'),
                    orient='index').reset_index().rename(columns={
                        'index': 'queryDatetime',
                        0: 'seatLeft'
                    })
                df['seatLeft_id'] = seatLeft_id
                df.to_csv(params["localFileName_org_seatLeft_data_add"],
                          header=False,
                          index=False,
                          mode='a')
        logger.info("====\"{}\" finished====".format(
            params["localFileName_org_seatLeft_data_add"].split('/')[-1]))
    utils.delete_before4_localData(
        params["localFileName_org_seatLeft_data_add"], params)
    utils.upload_to_hdfs(params["localFileName_org_seatLeft_data_add"],
                         params["sparkDirName_org_seatLeft_data_add"], params)
Esempio n. 22
0
def timer(task_id):
    """
    mongo是线程安全的,然后进程传递会有警告.查询说是可以设置参数connect=Flase,但是好像没有啥卵用。
    定时函数。
    1.告警轮询任务:根据规则的需要能够根据指定分钟间隔执行某种逻辑功能。
    2.周期数据统计任务:根据设置配置的crontab规则,执行需要查询的规则。
        1.type=1,默认执行原本的轮训监控任务
    :return:
    """
    pid = os.getpid()
    logger.info('task run pid:{}'.format(pid))
    # 记录该任务的执行进程,并记录,然后control process就可以不走判断,pass,然后该进程下定时任务定期轮询。
    db.tasks.find_one_and_update({'_id': task_id}, {'$set': {'pid': pid}})
    task_config = db.tasks.find_one({'_id': task_id})     # 该进程中执行的轮询任务的相关配置信息,可以对一个ObjectId转ObjectId型
    type = task_config.get('type')
    # 如果type为1,触发的是周期告警轮询任务
    if type == 1:
        timeCell = task_config.get('timeCell')  # 轮询周期
        # logger.info('task timeCell:{}'.format(timeCell))
        sched = BlockingScheduler()
        sched.add_job(logAlert_update_info, 'interval', seconds=timeCell*60, args=(task_id,))
        logger.info('logAlert_update_info function will start,task_id is {}, timeCell is {}min'.format(task_id, timeCell))
        sched.start()
    #  周期统计类型,利用icon进行指定时间周期指执行统计,将项目里面配置的所有查询参数,查询汇总到邮件里直接发送。没有短信的选项了。
    elif type == 2:
        interval = task_config.get('interval')  # 按照不同的时间周期,有三种,日,周,月,需要根据这个判断查询的起始和结束时间
        crontab = task_config.get('crontab')
        logger.info(crontab)         # 按照crontab配置规则,决定统计的周期等参数
        sched = BlockingScheduler()
        sched.add_job(count_info_interval, CronTrigger.from_crontab(crontab), args=(task_id,))
        logger.info('count_info_interval will start and interval = {},task_id is {}'.format(interval, task_id))
        sched.start()
    #    统计指定时间间隔的日志错误数量,不是轮询任务,运行一次获取到相关参数就结束统计
    elif type == 3:
        logger.info('count_info_once will start, task_id is {}'.format(task_id))
        count_once_info(task_id)
def lowPrice_train_data_add(params):
    columns = ["queryDate", 'price', 'id', 'org', 'dst']
    yesterday_monthDay_str = (params["generateDate"] -
                              timedelta(days=1)).strftime('%m-%d')
    with open(params["localFileName_org_lowPrice_data_add"], 'w') as f:
        f.write(','.join(columns))
        f.write('\n')
        f.seek(2)
        for sample in cursor.find(
            {'_id': {
                "$regex": r'.*{}$'.format(yesterday_monthDay_str)
            }}):
            lowPrice_id = sample.get('_id')
            del sample['_id']
            org = sample.get('dairport')
            del sample['dairport']
            dst = sample.get('aairport')
            del sample['aairport']
            df = pd.DataFrame.from_dict(
                sample, orient='index').reset_index().rename(columns={
                    'index': 'queryDate',
                    0: 'price'
                })
            df['id'] = lowPrice_id
            df['org'] = org
            df['dst'] = dst
            df.to_csv(params["localFileName_org_lowPrice_data_add"],
                      header=False,
                      index=False,
                      mode='a')
        logger.info("=====\"{}\" finished======".format(
            params["localFileName_org_lowPrice_data_add"].split('/')[-1]))
    utils.delete_before4_localData(
        params["localFileName_org_lowPrice_data_add"], params)
    utils.upload_to_hdfs(params["localFileName_org_lowPrice_data_add"],
                         params["sparkDirName_org_lowPrice_data_add"], params)
Esempio n. 24
0
def org_TrainPass_data(params):
    with open(params["localFileName_org_TrainPass_data"], 'w') as f:
        columns = [
            'tn', 'arriveTime', 'stationCode', 'departTime', 'stationName',
            'orderNum'
        ]
        f.write(','.join(columns))
        f.write('\n')
        f.seek(0, 2)
        for sample in cursor_pass.find({}):
            tn = sample.get("_id")
            List = sample.get("array")
            counter = 0
            for sub in List:
                counter += 1
                content = ','.join([tn] + list(map(str, sub.values())) +
                                   [str(counter)])
                f.write(content + '\n')
    logger.info("====\"{}\" finished====".format(
        params["localFileName_org_TrainPass_data"].split('/')[-1]))
    utils.delete_before2_localData(params["localFileName_org_TrainPass_data"],
                                   params)
    utils.upload_to_hdfs(params["localFileName_org_TrainPass_data"],
                         params["localFileName_org_TrainPass_data"], params)
def startCronTask(task, **interval_config):
    # 定义全局变量scheduler,用于控制定时任务的启动和停止
    global scheduler
    scheduler = BlockingScheduler()
    scheduler.add_listener(CronTask_listener,
                           EVENT_JOB_EXECUTED | EVENT_JOB_ERROR)
    scheduler._logger = logger
    logger.info(
        '==================================== 新的日志分段 =============================================='
    )
    scheduler.add_job(func=task,
                      trigger='interval',
                      **interval_config,
                      id='push_to_github')
    logger.info('当前所有定时任务job1:%s', scheduler.get_jobs())
    logger.info('定时任务调度器状态1:%s', scheduler.state)

    scheduler.start()
Esempio n. 26
0
    def judge(self,receiveRequest,userName,pwd):
        try:
            nowUser = MyBaseModel.returnList(User.select(User.userName,User.userPwd).where(User.userName==userName))
            if len(nowUser) > 0:
                if nowUser[0]['userPwd'] == pwd:

                    my_session = Session(receiveRequest)
                    session_id = my_session.getSessionId()
                    my_session['name'] = userName
                    logger.info('用户:%s 登录' % userName)
                    return {"status": 1,'session_id': session_id,'username':userName}
                else:
                    logger.info('用户:%s 登录失败,原因:密码错误' % userName)
                    return {"status": 0, "errorInfo": "密码错误"}

            else:
                logger.info('用户:%s 登录失败,原因:该用户不存在' % userName)
                return {"status": 0, "errorInfo": "该用户名不存在"}
        except:
            raise
Esempio n. 27
0
    def change(self, user_name, user_oldpwd, user_newpwd1, user_newpwd2):
        try:
            with db.execution_context():
                nowuser = new_users.select().where(
                    new_users.username == user_name)
                if len(nowuser) > 0:
                    if nowuser[0].userpass == user_oldpwd:
                        #user_roles=getNowUserRole(user_name)
                        if user_newpwd1 == user_newpwd2:
                            nowuser[0].userpass = user_newpwd1
                            nowuser[0].save()
                            logger.info('用户:%s 修改密码成功' % user_name)
                            return json.dumps({
                                "status": 1,
                                'info': '修改密码成功'
                            },
                                              ensure_ascii=False)
                        else:
                            return json.dumps(
                                {
                                    "status": 0,
                                    "errorInfo": "两次输入的新密码不一致"
                                },
                                ensure_ascii=False)
                    else:
                        logger.info('用户:%s 修改密码失败,原因:原密码输入错误' % user_name)
                        return json.dumps({
                            "status": 0,
                            "errorInfo": "原密码不正确"
                        },
                                          ensure_ascii=False)

                else:
                    logger.info('用户:%s 修改密码失败,原因:该用户不存在' % user_name)
                    return json.dumps({
                        "status": 0,
                        "errorInfo": "修改失败,该用户不存在"
                    },
                                      ensure_ascii=False)
        except:
            raise
Esempio n. 28
0
    def judge(self, response_self, user_name, user_pwd):
        try:
            nowuser = MyBaseModel.returnList(
                new_users.select().where(new_users.username == user_name))
            if len(nowuser) > 0:
                if nowuser[0]['userpass'] == user_pwd:
                    user_roles = getUserRole(user_name)
                    #user_roles=getNowUserRole(user_name)
                    my_session = Session(response_self)
                    session_id = my_session.get_session_id()
                    my_session['name'] = user_name
                    logger.info('用户:%s 登录' % user_name)
                    return json.dumps(
                        {
                            "status": 1,
                            'data': user_roles,
                            'session_id': session_id
                        },
                        ensure_ascii=False)
                else:
                    logger.info('用户:%s 登录失败,原因:密码错误' % user_name)
                    return json.dumps({
                        "status": 0,
                        "errorInfo": "密码错误"
                    },
                                      ensure_ascii=False)

            else:
                logger.info('用户:%s 登录失败,原因:该用户不存在' % user_name)
                return json.dumps({
                    "status": 0,
                    "errorInfo": "该用户名不存在"
                },
                                  ensure_ascii=False)
        except:
            raise
Esempio n. 29
0
    dataParser = DataParser(params, featureDict)
    gc.collect()
    fileNames = clientHdfs.list(params['sparkDirName_trainData'])
    # fileNames = os.listdir(params['sparkDirName_trainData'])
    fileNames.remove('_SUCCESS')
    fileNames_num = len(fileNames)

    dfm = deepFM.DeepFM(**dfm_params)
    train_scores = []
    val_scores = []
    train_losses = []
    val_losses = []
    for epoch in range(params['epoches']):
        counter = 0
        for fileName in fileNames:
            counter += 1
            train_model(fileName, params, dataParser)
            # logger.info("====Train {}-{} finished====".format(epoch, counter))
            if counter % 100 == 0:
                dfm.saver.save(dfm.sess,
                               "{}-{}-{}".format(
                                   params['localDirName_deepFM_model'],
                                   params['generateDate_str1'], epoch + 1),
                               global_step=counter)
    dfm.sess.close()
    logger.info("====deepFM Train Model finished====")

    load_model_to_predict.run_load_model_predict(params, dfm_params)

    write_result_to_mongo.Result_To_Mongo(params)
Esempio n. 30
0
def count_info_interval(task_id):
    """
    针对设置的参数定时统计数据,然后可以系统内可以控制添加和移除查询参数,也可以关闭项目的统计任务,不需要进行短信通知,只邮件发送。
    :param task_id:
    :return:
    """
    logger.info('count info interval task id is {}'.format(task_id))
    task_config = db.tasks.find_one({'_id': ObjectId(task_id)})
    name = task_config.get('name')
    app = task_config.get('app')
    interval = task_config.get('interval')
    if interval == 1:  # 当前日期前一天的起始和结束时间
        start_time, end_time = get_yesterday_timestamp()
    elif interval == 7:  # 当前日期的前一周的起始和结束时间
        start_time, end_time = get_last_week_times()
    else:    # 当前日期的前一个月的起始和结束时间
        start_time, end_time = get_last_month_times()
    URL = 'http://test.yuxisoft.cn:19200/logstash-{}-*/doc/_search'.format(app)
    person = task_config.get('person')
    params = task_config.get('params')
    # way = task_config.get('way')
    _range = {"range": {
        "@timestamp": {
            "gt": "{}".format(start_time),
            "lt": "{}".format(end_time)
        }
    }
    }
    # 统计任务,汇总同一个项目下的不同查询规则的参数,汇总所有查询结果,然后统一一个邮件发送。
    content = ''
    for params in params:
        filters = convert_params(params)
        filters.insert(0, _range)
        query_params = {
            "size": 1000,
            "sort": {
                "@timestamp": "desc"
            },
            "query": {
                "bool": {
                    "filter": filters
                }
            }
        }

        logging.info('-------------')
        logging.info('query params:')
        logging.info('{}'.format(query_params))
        logging.info('-------------')

        headers = {
            'Content-Type': 'application/json'
        }
        query_params = json.dumps(query_params)
        result = requests.post(URL, headers=headers, data=query_params)
        resp_str = result.text
        resp_conn = json.loads(resp_str)
        logging.info('-------------')
        logging.info('query from elk result:')
        logging.info('{}'.format(resp_conn))
        logging.info('-------------')
        total = resp_conn['hits']['total']
        content = content + "查询规则:{}, 出现次数:{} 次.\n".format(params, total)
    start_time_str = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time/1000))
    end_time_str = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time/1000))
    logger.info('project:{}count info interval will send email'.format(app))
    result = send_email3(person, name, content, start_time_str, end_time_str, interval)
    logger.info('count info interval send email result is {}'.format(result))