(status, output) = commands.getstatusoutput(shell_command) if status == 0: print time.strftime("%H:%M:%S", time.localtime( )), "==========================================>", log_str, "ok" log.info(log_str + "ok") return True else: print time.strftime("%H:%M:%S", time.localtime( )), "==========================================>", log_str, "fail" log.info(log_str + "fail") return False if __name__ == '__main__': date_day = MyTime.get_date(1) if len(sys.argv) == 2: date_day = sys.argv[1] print time.strftime("%H:%M:%S", time.localtime( )), "==========================================>un tar begin" if un_tar(date_day): print time.strftime("%H:%M:%S", time.localtime( )), "==========================================>load to mongodb begin" if tar_to_mongodb(date_day): print time.strftime( "%H:%M:%S", time.localtime() ), "==========================================>load to mongodb end" if mongodb_to_mysql(date_day): time.sleep(10) print time.strftime(
log.debug(logString) #MyAlarm.send_mail_sms(logString) #删除分区 try: if table not in self.reserve: dropData = 'use to8to_rawdata;alter table ' + table + ' drop partition (dt=' + self.last7DateFormat + ')' # self.last7DateFormat = str(self.last7Date).replace('-', '') 删除7天前的分区 hive.execute(dropData) #删除掉当天txt文件 #os.remove(file) except Exception, ex: pass try: rmCmd = 'rm -rf ' + self.delPath # 删除当天的分区 self.doPath = '/data1/bi/platform/tar/2016-12-21/Mysql/' os.system(rmCmd) except Exception, ex: pass if __name__ == '__main__': date = MyTime.get_date(1) #自动取昨天日期 if len(sys.argv) == 2: date = sys.argv[1] hive = toHive(date) if hive.tarFile(): #解压tar.gz文件 hive.cutTxt() #替换隐私信息为null hive.txtToHive() #替换后的txt文件存入hive #os.system('python /data1/bi/platform/scripts/BI/BISub/bi_yewu_caiwu_argv.py ' + date) #os.system('python /data1/bi/platform/scripts/BI/BISub/bi_zxgs_yunyingjibie.py ' + date)
except Exception, ex: print str(ex) pass print MyTime.get_local_time( ), '-------------- tar click stream file begin' # from cube import MyTime log.info( 'tar click stream file begin' ) # 写入日志 log = MyLog.MyLog(path='/data1/bi/platform/scripts/BI/ClickStream/log/', name='ClickStream', type='to8to', level='DEBUG') if check_file(): ## 3.4 检查json日志文件是否生成 print MyTime.get_local_time( ), '-------------- tar click stream file success, then process work' log.info('tar click stream file success') log.info('click stream to8to process work') get_file_size(FILE_JSON_NAME) ## 3.5 获得文件大小,这个有点多余 click_stream() ## 3.6 开始清洗 print MyTime.get_local_time(), '-------------- process success' log.info('click stream to8to process work success') if __name__ == '__main__': date = MyTime.get_date(1) #1 、直接获取计算日期的当天时间 :'2016-11-29',代表获取前一天的数据 if len(sys.argv) == 2: date = sys.argv[1] #2 、假如脚本后面带 时间参数,就用参数(猜测这个是用来便于回滚脚本设置的) main(date) #3 、清洗数据 print MyTime.get_local_time( ), '-------------- load all file into hive begin' #4、 MyTime.get_local_time() 用来获取当前时间:'2016-11-30 19:41:16' #load_file(date) #5、 导数据 #clickstream_sjb_shell = 'python /data1/bi/platform/scripts/BI/ClickStream/ClickStreamSJB.py ' + date #os.system(clickstream_sjb_shell) #del_file() #解析后可以删除源文件
file_path = os.path.join(root, file) if 'sem_sm_keyword' in file: hive.load_file('to8to_rawdata', 'sem_sm_keyword', file_path, partition_dict) elif 'sem_sm_diyu' in file: hive.load_file('to8to_rawdata', 'sem_sm_diyu', file_path, partition_dict) else: pass try: shutil.rmtree(data_path) except Exception, ex: print ex return True else: pass leave_time += 300 if leave_time > wait_time: return False time.sleep(300) if __name__ == '__main__': date = MyTime.get_date(1) if len(sys.argv) == 2: date = sys.argv[1] if not load_all(date): MyAlarm.send_mail_sms('load sem shenma keyword to hive status:fail')
for d in dirs: print os.path.join(root, d) for f in files: file = os.path.join(root, f) shell = '/usr/bin/dos2unix ' + file os.popen(shell) hive = MyHiveBin.HiveBin() hive.load_file_single_overwrite( 'to8to_rawdata', file) log.info('Mysql actual kefu yuyue to hive status:ok') break else: log.info('Mysql actual tar file not exists') now_time_stamp = MyTime.datetime_timestamp(MyTime.get_local_time()) if now_time_stamp > latest_time_stamp: log.critical('not find ' + tar_src) MyAlarm.send_mail_sms('Get Mysql actual tar file status:fail!') return False time.sleep(time_rate) return True if __name__ == '__main__': date = MyTime.get_date(1) if len(sys.argv) == 2: date = sys.argv[1] today = MyTime.get_date(0) if load_file(date, today): os.system(next_script)