def Del_sql(fname, cursor=cursor, mconn=mconn): global FileDEL global HSdel ttDel = [] try: cursor.execute('''use fortest;''') cursor.execute('''set sql_safe_updates = 0''') cursor.execute('''''') for fi in fname: cursor.execute('''delete from fmate_code where namepath = %s''', [fi]) ttDel.append(fi) #pdb.set_trace() cursor.execute( '''select hashid from hash_code where hashid not in (select hashid from fmate_code)''' ) #predel = [ pi[0] for pi in cursor.fetchall() ] predel = cursor.fetchall() if predel: cursor.executemany( '''delete from fline_code where hashid = %s''', predel) cursor.executemany( '''delete from hash_code where hashid = %s''', predel) cursor.execute('''set sql_safe_updates = 1''') mconn.commit() HSdel = len(predel) FileDEL = FileDEL + ttDel logger.info('Execute Del_sql() Success!') except: logger.exception('Failure Of Del_sql:') logger.warn('FAILURE OF DEL_SQL()!')
def fidAdd_sql(fname, hs, cursor=cursor, mconn=mconn): global FailCount global AddOkCount global FaiList #get fmtime,fctime,fcontent logger.debug('get fmtime,fctime,fcontent') fmate = [] fmt = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(os.path.getmtime(fname))) fct = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(os.path.getmtime(fname))) fmate.append(fname) fmate.append(fmt) fmate.append(fct) try: lhs = [hs] flineargs = GetContent(fname) cursor.execute('''use fortest;''') cursor.execute('''insert into hash_code( strhash ) values( %s );''', lhs) cursor.execute('''select last_insert_id() into @lshs;''') cursor.execute( '''insert into fmate_code( namepath, hashid, fmtime, fctime ) values( %s, @lshs, %s, %s );''', fmate) cursor.executemany( '''insert into fline_code( hashid, line, flid ) values( @lshs, %s, %s )''', flineargs) mconn.commit() AddOkCount = AddOkCount + 1 except: pdb.set_trace() logger.exception('Error msg') logger.info('fidAdd_sql--->Failed to write to Sql:\n%r', fname) MailCount = FailCount + 1 FailList.append(fname)
def fidMCR_sql(fname, hs, cursor=cursor, mconn=mconn): logger.debug('fidMCR_sql:本函数执行移动复制重命名文件的写入') global FailCount global McrOkCount global FaiList #get fmtime,fctime,fcontent logger.debug('get fmtime,fctime,fcontent') fmate = [] fmt = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(os.path.getmtime(fname))) fct = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(os.path.getmtime(fname))) fmate.append(fname) fmate.append(fmt) fmate.append(fct) lhs = [hs] try: #pdb.set_trace() cursor.execute('''use fortest;''') cursor.execute( '''select hashid from hash_code where strhash = %s into @filehs''', lhs) cursor.execute( '''insert into fmate_code( namepath, hashid, fmtime, fctime ) values( %s, @filehs, %s, %s );''', fmate) mconn.commit() logger.info('MCRing --->%r', fname) McrOkCount = McrOkCount + 1 except: pdb.set_trace() logger.exception('Error msg') logger.info('fidMCR_sql--->Failed to write to Sql:\n%r', fname) MailCount = FailCount + 1 FailList.append(fname)
def importReader( fname, DepID, Rgid, cursor = cursor, miconn = miconn ): logger.info('Func ----> importReader( fname, DepID, Rgid, cursor = cursor, miconn = miconn )') fdata = open( fname, 'r' ) flines = fdata.readlines() try: cursor.execute( 'select idDisp from Reader where DepID = ? order by idDisp', DepID ) tidsp = cursor.fetchall() except: #tidsp = None pass if tidsp: ti0 = set( range( 1, len(flines) + tidsp[-1][0] + 1 ) ) ti1 = set() for tii in tidsp: ti1.add( tii[0] ) idsp = list( ti0 - ti1 ) idsp.sort else: idsp = list( range( 1, len(flines) + 1 ) ) logger.info(idsp) ts = [] cnt = 0 for ifl in flines: tn = re.split('\t|\n',ifl) ttn = tn[:2] + [ DepID, Rgid , idsp[ cnt ], str(random.randint( 100000, 999999 )) ] ts.append( tuple(ttn) ) cnt += 1 logger.debug(ts) cursor.executemany( 'insert into Reader ( Name, Rid, DepID, Rgid, idDisp, GuestPassWord ) values ( ?, ?, ?, ?, ?, ?)', ts ) miconn.commit()
def getfname(self): #返回数据库记录的完整文件名的集合 self.cursor.execute('use fortest') self.cursor.execute('select namepath from fmate_code') Fisql = set() for scfi in self.cursor.fetchall(): Fisql.add(scfi[0]) logger.info('返回数据库记录的完整文件名的集合') logger.info('Numbers of file records get from Sql: %r' % len(Fisql)) return Fisql, len(Fisql)
def getfname(self): #返回磁盘上需要追踪的完整文件名的集合 allFiles = [] Files = set() for root, dirs, files in os.walk(self.path): for filei in files: fntt = os.path.join(root, filei) if fntt[-3:] == '.py' or fntt[-8:] == '[email protected]': Files.add(fntt) #pdb.set_trace() logger.info('返回磁盘上需要追踪的完整文件名的集合') logger.info('Numbers of py and txt files get from disk: %r' % len(Files)) return Files, len(Files)
] Sq.append(urlst[8]) cnt = 0 count_Sq = 1 #Rurl = open( 'Result_JN_2.txt', '+w' ) #Rurl.write( '\n\n' ) S = '' while Sq: url = Sq.popleft() #Vis.add(url) logger.warn(str(len(Vis)) + 'len( Vis ) ' + str(count_Sq)) count_Sq -= 1 #logger.info( ' Already Sipdered:' + str( cnt ) + ' Spidering ----> ' + url ) logger.info('Url Numbers In The Q: ' + str(count_Sq)) if len(url) > 50: logger.info( '>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>') logger.info(url) continue try: Op = request.urlopen(url, timeout=2) cnt += 1 if 'html' not in Op.getheader('Content-Type'): #sf:log >>>>add logging common logger.debug('NO html:' + url) continue try: Dt_t = Op.read().decode('utf-8')
def lDay(beginDate, endDate, Holiday, Workday, tmp, temp=False): # temp 是一个bool值,当其为真,表示生成的借阅记录仅仅为了应对学期中间的临时检查 """ #先写一个生成整个学期借阅记录日期池的代码 #本程序适应的情况:半学年半学年的生成记录 #beginDate 日期池开始时间 endDate 日期池结束时间 采用的是'yyyy-mm-dd'格式的字符串 #holiday 起始时间范围内的法定假期,或者其他异常导致的非工作日 workday 起始时间内因调休导致的工作日 # 以上连个变量都是list,列表中的每项采用的是'mm-dd'格式的字符 #ldStat函数状态标志,以后每个函数都应包含名为Stat的函数状态标志 #--if(beginDate,endDate格式错误),ldStat=[5,'格式错误'] #本程序是以Abklendwork中五年的日期为蓝本,生成的借阅日期,用那几年为蓝本,也可以设计成传参数控制。 #--@ny,借阅记录生成时的年份,@hisy,将要选取的原记录的年份 #--@begindate,需要生成记录的起始时间,一般为学期的开始, @enddate,需要生成记录的结束时间,一般为学期的结束 #--tt--貌似declare语句声明的变量如@wk,在后面的语句如set语句和后面用到这个变量的语句中无法使用,会返回42000 # 错误,如果我把他们放在一个execute中执行,才有效。看看是否跟begin 和end有关。 #2016-2017上半学年 #2016-09-01 2011-01-14 #holiday #09-15 09-16 09-17 10-01 10-02 10-03 10-04 10-05 10-06 10-07 12-31 01-01 01-02 #workday #09-18 10-08 10-09 # # #日期筛选规则: # 先筛选bakcdate为周末或者在holiday数组中的记录,将其删除 # 筛选lenddate为周末且不在workday中,将其日期加入数组holiday,然后为workday生成记录,然后删除holiday记录 #workday补: # workday记录只从lenddate在workday之后的记录中转移(原则不超过30天) # workday记录优先从有记录的holiday中转移,至少要有一天的记录, # 如果不存在符合条件且有记录的holiday,则workday从lendate在workday之后的记录最多的三天中每天转移三分之一 """ ny = beginDate[:4] cursor.execute('SELECT DISTINCT year(lenddate) hisy FROM Abklendwork ORDER BY hisy') hisys = cursor.fetchall() hisy = [] # 清空临时表 cursor.execute('TRUNCATE TABLE AttLendWork') for hi in hisys: hisy.append(hi[0]) for hi in hisy: if hi < int(ny): # cursor.execute('set @hisy =' + str(hi)) # cursor.execute('set @wk = datediff(wk, @hisy, @ny)*7') cursor.execute('select datediff(wk, ?, ?)*7', str(hi), ny) wk = cursor.fetchall()[0][0] logger.info(wk) """ # backdate < endDate 说明本程序没有考虑学期中间有临时检查的来,学期中间可以有未归还的书籍 # 未归还的书籍backdate应当为NULL,boperator也应当为NULL # 上面的要求不难实现,首先更改下面语句的时间选择范围,更改 backdate < endDate 为 lenddate < endDate # 最后将生成的日期池中的backdate > endDate 的backdate 置为 NULL ,同时将boperator也置为NULL #pdb.set_trace() #进行下面语句之前,数据库中已经建好一个临时数据表AttLendWork """ # temp 是一个bool值,当其为真,表示生成的借阅记录仅仅为了应对学期中间的临时检查 if not temp: sql = """INSERT INTO AttLendWork(lenddate, backdate, returndate) SELECT lenddate + ?, backdate + ?, returndate + ? FROM abklendwork\ WHERE lenddate + ? > ? AND backdate + ? < ?;\ INSERT INTO attlendwork(lenddate, backdate, returndate) \ SELECT lenddate + ?-7, backdate + ?-7, returndate + ?-7 FROM abklendwork\ WHERE lenddate + ?-7 > ? AND backdate + ?-7 < ?;""" elif temp: sql = """INSERT INTO AttLendWork(lenddate, backdate, returndate) SELECT lenddate + ?, backdate + ?, returndate + ? FROM abklendwork\ WHERE lenddate + ? > ? AND lenddate+ ? < ?;\ INSERT INTO attlendwork(lenddate, backdate, returndate) \ SELECT lenddate + ?-7, backdate + ?-7, returndate + ?-7 FROM abklendwork\ WHERE lenddate + ?-7 > ? AND lenddate + ?-7 < ?;""" cursor.execute(sql, wk, wk, wk, wk, beginDate, wk, endDate, wk, wk, wk, wk, beginDate, wk, endDate) # 日期筛选整理 holiday = [] workday = [] # yHalf:1,下半学年,0,上半学年,_,未设置 yHalf = '_' for hi in Holiday: # 如果开学日期为下半年的日期,跨过元旦,年份应该加一年。 # 这里突出了日期检查的重要,‘2016-09-02’能够正常工作,其他的诸如‘2016-9-2’,‘09-02-2016’都将不能正常运行,月份格式必须为‘09-02’ if int(beginDate[5:7]) > 7 and (hi[:2] in ['01', '02']): holiday.append(str(int(ny) + 1) + '-' + hi) yHalf = '1' else: holiday.append(str(int(ny)) + '-' + hi) yHalf = '0' for wi in Workday: if int(beginDate[5:7]) > 7 and (wi[:2] in ['01', '02']): workday.append(str(int(ny) + 1) + '-' + wi) else: workday.append(str(int(ny)) + '-' + wi) # 先筛选bakcdate为周末或者在holiday数组中的记录,将其删除 sql = 'DELETE FROM AttLendWork WHERE datepart(DW, backdate) IN (1,7)' cursor.execute(sql) rowCount = cursor.rowcount for hi in holiday: sql = 'DELETE FROM AttLendWork WHERE CONVERT(VARCHAR(100), backdate, 23) = ?' cursor.execute(sql, hi) logger.info(rowCount) rowCount = rowCount + cursor.rowcount tt = 'Delete backdate Effect rows:' + str(rowCount) logger.info(tt) # 筛选lenddate为周末且不在workday中,将其日期加入数组holiday sql = 'SELECT DISTINCT CONVERT(VARCHAR(100), lenddate, 23) FROM AttLendWork WHERE datepart(DW, lenddate) IN (1,7)' cursor.execute(sql) for li in cursor: if li[0] not in workday and li[0] not in holiday: holiday.append(li[0]) logger.info(holiday) # 为workday生成记录,然后删除holiday记录 for wi in workday: t = 0 wcnt = 0 for hi in holiday: cursor.execute('select datediff(dd,?,?)', wi, hi) diff = cursor.fetchone()[0] if diff < 30 and diff > 0: # 执行日期替换 sql = 'UPDATE AttLendWork SET lenddate = ? WHERE CONVERT(VARCHAR(100),lenddate,23) = ?' cursor.execute(sql, wi, hi) wcnt = wcnt + cursor.rowcount logger.debug('本次替换:') logger.debug(wcnt) holiday.remove(hi) if wcnt > 0: t = 1 if wcnt > 30: logger.debug("替换超过30条,即可结束替换") logger.debug(wcnt) break if t == 1: cursor.commit() if t == 0: try: logger.debug("holiday中不存在合格的替换日期,从普通日期中替换") # sql = 'update AttLendWork set lenddate = ? where did in (select top 30 did from AttLendWork where lenddate = (select top 1 lenddate from AttLendWork where lenddate > ? group by lenddate order by count(lenddate) desc ))' # pdb.set_trace() sql = 'SELECT TOP 1 CONVERT( VARCHAR(100), lenddate, 23) FROM AttLendWork WHERE lenddate > ? GROUP BY lenddate ORDER BY count(lenddate) DESC' cursor.execute(sql, wi) tli = cursor.fetchone()[0] sql = 'UPDATE AttLendWork SET lenddate = ? WHERE did IN (SELECT TOP 30 did FROM AttLendWork WHERE lenddate = ?)' cursor.execute(sql, wi, tli) logger.debug('本次替换:') wcnt = cursor.rowcount logger.debug(wcnt) if wcnt > 0: cursor.commit() except: logger.warn('日期 %s 没有合适的替换日期 ' % wi) logger.debug('删除lenddate在holiday中的记录') sql = 'DELETE FROM AttLendWork WHERE CONVERT(VARCHAR(100), lenddate, 23) = ?' for hi in holiday: cursor.execute(sql, hi) logger.debug(cursor.rowcount) cursor.commit() if tmp: toperator = '王' + '_00' else: toperator = '王' + ny[2:4] + yHalf # pdb.set_trace() cursor.execute('UPDATE AttLendWork SET loperator = ?, boperator = ? WHERE 1=1', toperator, toperator) if temp: # temp 是一个bool值,当其为真,表示生成的借阅记录仅仅为了应对学期中间的临时检查 # 将生成的日期池中的backdate > endDate 的backdate 置为 NULL ,同时将boperator也置为NULL cursor.execute('UPDATE AttLendWork SET backdate = NULL ,boperator = NULL WHERE backdate >= ?', endDate) logger.debug(cursor.rowcount) cursor.commit() logger.info(hisy) # return(hisy) # for hi in hisy:
def fRefresh_sql(nfmate, cursor=cursor, mconn=mconn): global FailCount global FileRefresh for nfi in nfmate: #print(nfi) cursor.execute('''use fortest;''') cursor.execute( '''select hashid from fmate_code where namepath = %s into @oldhashid''', nfi[0:1]) cursor.execute( '''select count(namepath) from fmate_code where hashid = @oldhashid''' ) c_nfi = cursor.fetchall()[0][0] logger.debug('c_nfi = %r', c_nfi) try: flineargs = GetContent(nfi[0]) #暂时:这里以文件时间判断为准,即使hash值一样,也会运行更新程序 if nfi[3] == nfi[4]: cursor.execute('''select @oldhashid into @lshs''') else: cursor.execute( '''insert into hash_code( strhash ) values( %s )''', nfi[3:4]) cursor.execute('''select last_insert_id() into @lshs''') #更新文件信息 cursor.execute( '''update fmate_code set hashid = @lshs, fctime = %s, fmtime = %s where namepath = %s''', nfi[-3:-6:-1]) mconn.commit() if c_nfi > 1 and 0: cursor.executemany( '''insert into fline_code( hashid, line, flid ) values( @lshs, %s, %s )''', flineargs[0]) elif c_nfi == 1: pdb.set_trace() cursor.execute('''update fline_code set hashid = @lshs where hashid = @oldhashid ''') if nfi[3] != nfi[4]: cursor.execute( '''delete from hash_code where hashid = @oldhashid''') cursor.execute('''update fline_code set fstatus = 7 where hashid = @lshs and fstatus = 1''') cursor.execute('''update fline_code set fstatus = -1 where hashid = @lshs and fstatus = 0''') mconn.commit() flNew = [] flidChange = [] for fline in flineargs: cursor.execute( '''select lid, line, flid from fline_code where hashid = @lshs and fstatus in ( 7,-1 ) and line = %s limit 1''', [fline[0]]) fli_sql = cursor.fetchall() if not fli_sql: pdb.set_trace() flNew.append(fline) elif fli_sql[0][2] == fline[1]: cursor.execute( '''update fline_code set fstatus = 1 where lid = %s''', fli_sql[0][:1]) else: flidChange.append(fline[1:] + (fli_sql[0][0], )) cursor.execute( '''update fline_code set fstatus = 1 where lid = %s''', fli_sql[0][:1]) pdb.set_trace() cursor.executemany( '''insert into fline_code( hashid, line, flid ) values( @lshs, %s, %s )''', flNew) cursor.executemany( '''update fline_code set flid = %s, fstatus = 1 where lid = %s''', flidChange) cursor.execute('''update fline_code set fstatus = 0 where hashid = @lshs and fstatus = 7''' ) cursor.execute('''delete from fline_code where hashid = @lshs and fstatus = -1''' ) mconn.commit() FileRefresh.append(nfi[0]) logger.info('Refresh:\t%r', nfi[0]) except: logger.exception('Refresh Failed:\t%s', nfi[0]) logger.warn('Refresh Failed:\t%s', nfi[0]) FailCount += 1
logger.warn('Failed to get hash of preAdd!') HsIsql = FIsql.gethash() for fsh in preadd.items(): """这里要考虑两种情形:1、文件内容已经存在,只是添加fid指向,就是所谓移动复制重命名 #2、文件内容不存在,满足文件名和hash值同时不再数据库中 #上面两种情况应该在数据库外用逻辑区分开,再处理 ##添加fid:意味着向文件要素表和文件信息表写入要添加的文件名所对应的所有信息,并分配fid #这里传递的参数,除了包含路径的完整的文件名,还有相应的信息,我觉得每个文件可以看做一个对象 #是文件class的一个实例。而fidAdd_sql()接收的参数就是每一个文件实例。 """ if fsh[1] in HsIsql: fidMCR_sql(*fsh) else: fidAdd_sql(*fsh) logger.info('fnum_sql: %d', fnum_sql) logger.info('fnum_disk: %d', fnum_disk) logger.info('FailCount: %d', FailCount) logger.info('AddOkCount: %d', AddOkCount) logger.info('McrOkCount: %d', McrOkCount) for Fli in FailList: print(Fli) preDel = f_sql - f_disk Del_sql(preDel) logger.info('HSdel: %d', HSdel) logger.info('DelCount: %d', len(FileDEL)) except: logger.exception('Fail to run codetsql!') """ fh_insql:在数据库中记录的文件名-hash值组合的集合,简称库文件 文件名-hash值组合:是判断独立文件的标志具有唯一性,文件名指的是包含路径的完整文件名
def lDay(beginDate, endDate, Holiday, Workday, tmp): """ #先写一个生成整个学期借阅记录日期池的代码 #本程序适应的情况:半学年半学年的生成记录 #beginDate 日期池开始时间 endDate 日期池结束时间 采用的是'yyyy-mm-dd'格式的字符串 #holiday 起始时间范围内的法定假期,或者其他异常导致的非工作日 workday 起始时间内因调休导致的工作日 # 以上连个变量都是list,列表中的每项采用的是'mm-dd'格式的字符 #ldStat函数状态标志,以后每个函数都应包含名为Stat的函数状态标志 #--if(beginDate,endDate格式错误),ldStat=[5,'格式错误'] #本程序是以Abklendwork中五年的日期为蓝本,生成的借阅日期,用那几年为蓝本,也可以设计成传参数控制。 #--@ny,借阅记录生成时的年份,@hisy,将要选取的原记录的年份 #--@begindate,需要生成记录的起始时间,一般为学期的开始, @enddate,需要生成记录的结束时间,一般为学期的结束 #--tt--貌似declare语句声明的变量如@wk,在后面的语句如set语句和后面用到这个变量的语句中无法使用,会返回42000 # 错误,如果我把他们放在一个execute中执行,才有效。看看是否跟begin 和end有关。 #2016-2017上半学年 #2016-09-01 2011-01-14 #holiday #09-15 09-16 09-17 10-01 10-02 10-03 10-04 10-05 10-06 10-07 12-31 01-01 01-02 #workday #09-18 10-08 10-09 # # #日期筛选规则: # 先筛选bakcdate为周末或者在holiday数组中的记录,将其删除 # 筛选lenddate为周末且不在workday中,将其日期加入数组holiday,然后为workday生成记录,然后删除holiday记录 #workday补: # workday记录只从lenddate在workday之后的记录中转移(原则不超过30天) # workday记录优先从有记录的holiday中转移,至少要有一天的记录, # 如果不存在符合条件且有记录的holiday,则workday从lendate在workday之后的记录最多的三天中每天转移三分之一 """ ny = beginDate[:4] cursor.execute( 'select distinct year(lenddate) hisy from Abklendwork order by hisy') hisys = cursor.fetchall() hisy = [] #清空临时表 cursor.execute('truncate table AttLendWork') for hi in hisys: hisy.append(hi[0]) for hi in hisy: if hi < int(ny): #cursor.execute('set @hisy =' + str(hi)) #cursor.execute('set @wk = datediff(wk, @hisy, @ny)*7') cursor.execute('select datediff(wk, ?, ?)*7', str(hi), ny) wk = cursor.fetchall()[0][0] logger.info(wk) #pdb.set_trace() #进行下面语句之前,数据库中已经建好一个临时数据表AttLendWork sql = """insert into AttLendWork(lenddate, backdate, returndate) \ select lenddate + ?, backdate + ?, returndate + ? from abklendwork\ where lenddate + ? > ? and backdate + ? < ?;\ insert into attlendwork(lenddate, backdate, returndate) \ select lenddate + ?-7, backdate + ?-7, returndate + ?-7 from abklendwork\ where lenddate + ?-7 > ? and backdate + ?-7 < ?;""" cursor.execute(sql, wk, wk, wk, wk, beginDate, wk, endDate, wk, wk, wk, wk, beginDate, wk, endDate) #日期筛选整理 holiday = [] workday = [] #yHalf:1,下半学年,0,上半学年,_,未设置 yHalf = '_' for hi in Holiday: #如果开学日期为下半年的日期,跨过元旦,年份应该加一年。 #这里突出了日期检查的重要,‘2016-09-02’能够正常工作,其他的诸如‘2016-9-2’,‘09-02-2016’都将不能正常运行,月份格式必须为‘09-02’ if int(beginDate[5:7]) > 7 and (hi[:2] in ['01', '02']): holiday.append(str(int(ny) + 1) + '-' + hi) yHalf = '1' else: holiday.append(str(int(ny)) + '-' + hi) yHalf = '0' for wi in Workday: if int(beginDate[5:7]) > 7 and (wi[:2] in ['01', '02']): workday.append(str(int(ny) + 1) + '-' + wi) else: workday.append(str(int(ny)) + '-' + wi) #先筛选bakcdate为周末或者在holiday数组中的记录,将其删除 sql = 'delete from AttLendWork where datepart(dw, backdate) in (1,7)' cursor.execute(sql) rowCount = cursor.rowcount for hi in holiday: sql = 'delete from AttLendWork where CONVERT(varchar(100), backdate, 23) = ?' cursor.execute(sql, hi) logger.info(rowCount) rowCount = rowCount + cursor.rowcount tt = 'Delete backdate Effect rows:' + str(rowCount) logger.info(tt) #筛选lenddate为周末且不在workday中,将其日期加入数组holiday sql = 'select distinct CONVERT(varchar(100), lenddate, 23) from AttLendWork where datepart(dw, lenddate) in (1,7)' cursor.execute(sql) for li in cursor: if li[0] not in workday and li[0] not in holiday: holiday.append(li[0]) logger.info(holiday) #为workday生成记录,然后删除holiday记录 for wi in workday: t = 0 wcnt = 0 for hi in holiday: cursor.execute('select datediff(dd,?,?)', wi, hi) diff = cursor.fetchone()[0] if diff < 30 and diff > 0: #执行日期替换 sql = 'update AttLendWork set lenddate = ? where CONVERT(varchar(100),lenddate,23) = ?' cursor.execute(sql, wi, hi) wcnt = wcnt + cursor.rowcount logger.debug('本次替换:') logger.debug(wcnt) holiday.remove(hi) if wcnt > 0: t = 1 if wcnt > 30: logger.debug("替换超过30条,即可结束替换") logger.debug(wcnt) break if t == 1: cursor.commit() if t == 0: try: logger.debug("holiday中不存在合格的替换日期,从普通日期中替换") #sql = 'update AttLendWork set lenddate = ? where did in (select top 30 did from AttLendWork where lenddate = (select top 1 lenddate from AttLendWork where lenddate > ? group by lenddate order by count(lenddate) desc ))' #pdb.set_trace() sql = 'select top 1 CONVERT( varchar(100), lenddate, 23) from AttLendWork where lenddate > ? group by lenddate order by count(lenddate) desc' cursor.execute(sql, wi) tli = cursor.fetchone()[0] sql = 'update AttLendWork set lenddate = ? where did in (select top 30 did from AttLendWork where lenddate = ?)' cursor.execute(sql, wi, tli) logger.debug('本次替换:') wcnt = cursor.rowcount logger.debug(wcnt) if wcnt > 0: cursor.commit() except: logger.warn('日期 %s 没有合适的替换日期 ' % wi) logger.debug('删除lenddate在holiday中的记录') sql = 'delete from AttLendWork where CONVERT(varchar(100), lenddate, 23) = ?' for hi in holiday: cursor.execute(sql, hi) logger.debug(cursor.rowcount) cursor.commit() if tmp: toperator = '王' + '_00' else: toperator = '王' + ny[2:4] + yHalf #pdb.set_trace() cursor.execute( 'update AttLendWork set loperator = ?, boperator = ? where 1=1', toperator, toperator) logger.debug(cursor.rowcount) cursor.commit() logger.info(hisy)
def gener(beginDate, endDate, Holiday, Workday, yearBefore, times=4, Reader='All', tmp=False): #gStat函数状态标志,以后每个函数都应包含名为Stat的函数状态标志 #times: 本时间段内,平均每生的借阅次数 #yearBefore:如果生成本学年的,值为0,如果生成上个学年的借阅记录,值为1,上上个学年,值为2,依次类推 # 如果生成从前年份的借阅记录,比如一年以前,那么现在的六年级就是当时的五年级,对应的bid应当选五年级读者借过的 # 而原先的六年级已经毕业,所以不用生成了 if yearBefore > 5 or yearBefore < 0 or type(yearBefore) is not int: gStat = [ 'yearBefore:如果生成本学年的,值为0,如果生成上个学年的借阅记录,值为1,上上个学年,值为2,依次类推', 'yearBefore参数错误', False ] logger.warn(gStat) pdb.set_trace() return gStat tGrade = [['一', 2.5], ['二', 3.5], ['三', 4], ['四', 4.5], ['五', 5.5], ['六', 4]] if yearBefore == 0: tGrid = tGbid = tGrade else: tGrid = tGrade[yearBefore:] tGbid = tGrade[:-yearBefore] ttRidBid = [] trid_num = 0 for tgr, tgb in myzip(tGrid, tGbid): #pdb.set_trace() rid = Rid(tgr[0]) rL = len(rid) trid_num = trid_num + rL bid = Bid(tgb[0]) bL = len(bid) #for tg in tGrade: # rid = Rid(tg[0]) # rL = len(rid) # trid_num = trid_num + rL # bid = Bid(tg[0]) # bL = len(bid) #这里如果从前曾经借过的书越多,这个年级生成的条目占比就越多,如此循环,最后可致比例失衡,下面加入比例控制因子 #一年级生均2.5,二年级3.5,三4,四4.5,五5.5,六4 #总记录数 = 一年级人数 * 2.5 + 二年级人数 * 3.5 +三年级人数 * 4 +四年级人数 * 4.5 +五年级人数 * 5.5 +六年级人数 * 4 if (bL <= rL): return False rid = rid * (bL // rL + 1) random.shuffle(rid) random.shuffle(bid) rid = rid[:bL] if bL > rL * tgb[1]: bL = round(rL * tgb[1]) for i in range(bL): ti = bid[i] + [rid[i]] ttRidBid.append(ti) #pdb.set_trace() random.shuffle(ttRidBid) logger.debug('生成日期条目:') lDay(beginDate, endDate, Holiday, Workday, tmp) #读者、日期匹配规则: # 先把所有lenddate出现次数少于50的记录全匹配一遍(避免日期丢失问题),然后剩余读者的按概率随机匹配 try: sql = 'select count(lenddate) cn,CONVERT(varchar(100), lenddate, 23) lenddate into #t1 from AttLendWork group by lenddate' cursor.execute(sql) except: cursor.execute('truncate table #t1') sql = 'insert into #t1 select count(lenddate) cn,CONVERT(varchar(100), lenddate, 23) lenddate from AttLendWork group by lenddate' cursor.execute(sql) sql = 'select did from AttLendWork where CONVERT(varchar(100), lenddate, 23) in (select lenddate from #t1 where cn < 50)' cursor.execute(sql) Adid = set() for ci in cursor.fetchall(): Adid.add(ci[0]) tnum = len(ttRidBid) - len(Adid) if tnum > 0: #--从剩余的日期条目随机抽取匹配读者条目,先抽取3倍需要量,再截取 #pdb.set_trace() sql = 'select top %s did from AttLendWork order by checksum(newid())' % ( str(tnum * 3)) cursor.execute(sql) Bdid = set() for ci in cursor.fetchall(): Bdid.add(ci[0]) Cdid = list(Bdid - Adid) random.shuffle(Cdid) #汇聚所有用于生成记录的日期条目 Ddid = Cdid[:tnum] + list(Adid) else: Ddid = list(Adid) Ddid.sort() tLendWork = [] for i in range(len(ttRidBid)): ttRidBid[i].append(Ddid[i]) tLendWork.append(tuple(ttRidBid[i])) #pdb.set_trace() logger.info('预备有 %s 条记录写入LendWork' % len(tLendWork)) sql = 'insert into LendWork( bid, Bcid, Rid, LendDate, ReturnDate, BackDate, loperator, boperator ) select ?, ?, ?, LendDate, ReturnDate, BackDate, loperator, boperator from AttLendWork where did = ?' cursor.executemany(sql, tLendWork) #for i in tLendWork: # cursor.execute(sql, i) # cursor.commit() #logger.info(cursor.rowcount) cursor.commit() #pdb.set_trace() logger.info('Success!')
def gener(beginDate, endDate, Holiday, Workday, yearBefore, times=5, Reader='All', tmp=False, temp=False, tempNum=0): """ :param beginDate: 要生成借阅记录的起始时间,一般为学期开始 格式为: yyyy-mm-dd :param endDate: 要生成的借阅记录的结束时间 格式为: yyyy-mm-dd :param Holiday: 在 beginDate 和 endDate 之间的法定节假日,例如十一假期,中秋节等 格式为: [mm-dd,mm-dd,...] 是一个list,每年不尽相同,要根据实际调整 :param Workday: 在 beginDate 和 endDate 之间法定节假日调休而产生的工作日 格式为: [mm-dd,mm-dd,...] 是一个list,每年不尽相同,要根据实际调整 :param yearBefore: 如果为0,表示生成本学年度的借阅记录. (一个学年度两个学期,从一个暑假到另一个暑假之间的时间 同一学年度内,学生的年级不会发生变化,如果进入下一学年度,学生的年级将 升高一级,例如从五年级升入六年级) 如果值为n,表示生成从当前年份向前数第n年的借阅记录. :param times: 典型值是一个学期4次,表示在起始和结束日期之间,平均为每个学生生成times条借阅记录. :param tempNum: 参数用于决定需要生成多少条借阅记录,可以在配置文件config.py中设定 例如:用于20171016借阅记录生成条目总数的限制,开学仅仅一个半月,限制在960条吧 此时可以设置tempNum = 960,那么仅仅会有960条生成的记录写入数据库 :param Reader: 留用 :param tmp: 测试程序时,或者学期中间检查,生成一些临时记录,将其值置为1.此时生成的借阅记录的loperator为<王_00>. 测试完毕,可用 "delete from LendWork where loperator='王_00'" 语句仅将测试数据删除. ·注意在sql中,字符串是用单引号包围起来的部分,如果用双引号或者反引号包围, 那么表示的是列名,例如同样是上面的句子,如果这样写 delete from LendWork where loperator="王_00" 就会报找不到列名 王_00 的错误 :param temp: (会生成部分未归还记录)通常是按整个学期生成借阅记录,如果学期还未结束,有检查的来,需要临时生成一些借阅 记录,将此参数置为Ture,则可以生成一些<未归还>的书籍,让生成的结果更逼真. bug:有学生会在重叠的时间段内借阅两本书,以后可以在最后再加一个筛选函数解决 :gStat: 函数状态标志,以后每个函数都应包含名为Stat的函数状态标志 :return: None or gStat(函数状态标志,以后每个函数都应包含名为Stat的函数状态标志) """ # 参数范围校验 if yearBefore > 5 or yearBefore < 0 or type(yearBefore) is not int: gStat = [ 'yearBefore:如果生成本学年的,值为0,如果生成上个学年的借阅记录,值为1,上上个学年,值为2,依次类推', 'yearBefore参数错误', False ] logger.warn(gStat) return gStat """ tGrade: 各年级的times,平均借阅次数 这里如果从前曾经借过的书越多,这个年级生成的条目占比就越多,如此循环,最后可致比例失衡,下面加入比例控制因子 一年级生均2.5,二年级3.5,三4,四4.5,五5.5,六4 总记录数 = 一年级人数 * 2.5 + 二年级人数 * 3.5 +三年级人数 * 4 +四年级人数 * 4.5 +五年级人数 * 5.5 +六年级人数 * 4 """ bidInuse = set() times = times / 4 tGrade = [['一', 2.5 * times], ['二', 3.5 * times], ['三', 4 * times], ['四', 4.5 * times], ['五', 5.5 * times], ['六', 4 * times]] if yearBefore == 0: tGrid = tGbid = tGrade else: """ 这里的巧妙写法,应该添加详细注释 下面的语句中,会把tGrid和tGbid对应起来,加入yearBefore的值为1 则tGrid的序列是二三四五六, tGbid的序列是一二三四五, rid表示的是读者目前分别就读于二三四五六年级,那么1年以前, 他们所在的年级应当是一二三四五年级,所以那个时间,他们应当 借阅的书籍应该分别是从前一二三四五年级学生借阅过的书籍,所以 tGbid就选一二三四五, """ tGrid = tGrade[yearBefore:] tGbid = tGrade[:-yearBefore] ttRidBid = [] trid_num = 0 for tgR, tgB in myzip(tGrid, tGbid): # 选取需要生成借阅记录的读者(当前数据库内的真实读者) rid = Rid(tgR[0]) rL = len(rid) if rL == 0: logger.warn('@@@未找到任何 %s年级 读者。' % tgR[0]) continue trid_num = trid_num + rL # 选取恰当的书籍(bid),如上面选取的读者是在选定的时间期间,就读于五年级,则应选取五年级曾经借阅过的书籍 bid = Bid(tgB[0]) bL = len(bid) if (bL <= rL): return False # 让读者(rid)倍增到与bid(bL)数目相等 rid = rid * (bL // rL + 1) # shuffle(rid),对rid进行随机排序(打乱顺序) random.shuffle(rid) random.shuffle(bid) rid = rid[:bL] # 如果bid数目大于每个年级预设的平均借阅次数(tgB[i])与读者数(rL)的乘积,重设rL # round(x),对x进行四舍五入 if bL > rL * tgB[1]: bL = round(rL * tgB[1]) for i in range(bL): ti = bid[i] + [rid[i]] ttRidBid.append(ti) random.shuffle(ttRidBid) logger.debug('生成日期条目:') lDay(beginDate, endDate, Holiday, Workday, tmp, temp) """ 读者、日期匹配规则: 先把所有lenddate出现次数少于50的记录全匹配一遍(避免日期丢失问题),然后剩余读者的按概率随机匹配 """ try: sql = 'SELECT count(lenddate) cn,CONVERT(VARCHAR(100), lenddate, 23) lenddate INTO #t1 FROM AttLendWork GROUP BY lenddate' cursor.execute(sql) except: cursor.execute('TRUNCATE TABLE #t1') sql = 'INSERT INTO #t1 SELECT count(lenddate) cn,CONVERT(VARCHAR(100), lenddate, 23) lenddate FROM AttLendWork GROUP BY lenddate' cursor.execute(sql) sql = 'SELECT did FROM AttLendWork WHERE CONVERT(VARCHAR(100), lenddate, 23) IN (SELECT lenddate FROM #t1 WHERE cn < 50)' cursor.execute(sql) Adid = set() for ci in cursor.fetchall(): Adid.add(ci[0]) tnum = len(ttRidBid) - len(Adid) if tnum > 0: # --从剩余的日期条目随机抽取匹配读者条目,先抽取3倍需要量,再截取 # pdb.set_trace() sql = 'select top %s did from AttLendWork order by checksum(newid())' % ( str(tnum * 3)) cursor.execute(sql) Bdid = set() for ci in cursor.fetchall(): Bdid.add(ci[0]) Cdid = list(Bdid - Adid) random.shuffle(Cdid) # 汇聚所有用于生成记录的日期条目 Ddid = Cdid[:tnum] + list(Adid) else: Ddid = list(Adid) Ddid.sort() tLendWork = [] for i in range(len(ttRidBid)): ttRidBid[i].append(Ddid[i]) tLendWork.append(tuple(ttRidBid[i])) # 学期中间检查,前面生成的记录过多,下面语句用于截取部分数据。 # 例如:用于20171016借阅记录生成条目总数的限制,开学仅仅一个半月,限制在960条吧 # tempNum 参数用于决定需要生成多少条借阅记录,可以在配置文件config.py中设定 if temp: random.shuffle(tLendWork) tLendWork = tLendWork[:tempNum] # pdb.set_trace() logger.info('预备有 %s 条记录写入LendWork' % len(tLendWork)) sql = 'INSERT INTO LendWork( bid, Bcid, Rid, LendDate, ReturnDate, BackDate, loperator, boperator ) SELECT ?, ?, ?, LendDate, ReturnDate, BackDate, loperator, boperator FROM AttLendWork WHERE did = ?' cursor.executemany(sql, tLendWork) cursor.commit() logger.info('Success!')