def linksExt(page,url):#EXTRACTS LINKS FROM PAGES lst=[] #EXTRCTS DATA BETWEEN LINKS end=0 while page.find("a href",end)!=-1: startLnk=page.find("a href",end) start=page.find('>',startLnk) end=page.find('<',start) lnkData=page[start+1:end] lst.append(lnkData) #EXTRACTS LINKS lst2=[] while page.find("a href")!=-1: start_link=page.find("a href") start_quote=page.find('"',start_link) end_quote=page.find('"',start_quote+1) tst=page[start_quote+1:end_quote] page=page[end_quote:] lst2.append(tst) #MAKE LIST OF LINKS i=0 lnkLst=[] while i<len(lst): date = mod.chkDate(lst2[i])#GETS THE LAST MODIFIED DATE lnkLst.append([lst[i],lst2[i],date]) i+=1 return lnkLst
try: # Execute the SQL command cursor.execute(sql) # Fetch all the rows in a list of lists. rows = cursor.fetchall() print rows except: print "Error: unable to fecth data" i, j = 0, 0 lst = [] while i < len(rows): date = mod.chkDate(rows[i][0]) #GET THE LAST MODIFIED DATE OF WEBPAGE if date: if date != rows[i][1]: lst[j] = rows[i][0] j += 1 i += 1 print 'The links needed to be updated are:\n' print lst #--------------------------------------------------------------------------------------------------------------- # DISCONNECT FROM SERVER db.commit() db.close()
try: # Execute the SQL command cursor.execute(sql) # Fetch all the rows in a list of lists. rows = cursor.fetchall() print rows except: print "Error: unable to fecth data" i,j = 0,0 lst = [] while i<len(rows): date = mod.chkDate(rows[i][0])#GET THE LAST MODIFIED DATE OF WEBPAGE if date: if date != rows[i][1]: lst[j] = rows[i][0] j+=1 i+=1 print 'The links needed to be updated are:\n' print lst #--------------------------------------------------------------------------------------------------------------- # DISCONNECT FROM SERVER db.commit() db.close()