def index(url): response.content_type = 'application/json; charset=UTF-8' print"\n\n\n\n\n--------------------------------\n--- Getting Creation dates for:\n"+url+"\n\n" bitly = getBitlyCreationDate(url) print "Done Bitly" archives = getArchivesCreationDate(url) print "Done Archives" topsy = getTopsyCreationDate(url) print "Done Topsy" google = getGoogleCreationDate(url) print "Done Google" backlink = getBacklinksFirstAppearanceDates(url) print "Done Backlinks" lastmodified = getLastModifiedDate(url) print "Done Last Modified" lowest = getLowest([bitly,topsy,google,backlink,lastmodified,archives["Earliest"]]) print "Got Lowest" result = [] result.append(("URI", url)) result.append(("Estimated Creation Date", lowest)) result.append(("Last Modified", lastmodified)) result.append(("Bitly.com", bitly)) result.append(("Topsy.com", topsy)) result.append(("Backlinks", backlink)) result.append(("Google.com", google)) result.append(("Archives", archives)) values = OrderedDict(result) r = jsonlib.dumps(values, sort_keys=False, indent=2, separators=(',', ': ')) print r return r
def carbonDate(url): print "\n\n\n\n\n--------------------------------\n--- Getting Creation dates for:\n" + url + "\n\n" bitly = getBitlyCreationDate(url) print "Done Bitly" archives = getArchivesCreationDate(url) print "Done Archives" topsy = getTopsyCreationDate(url) print "Done Topsy" google = getGoogleCreationDate(url) print "Done Google" backlink = getBacklinksFirstAppearanceDates(url) print "Done Backlinks" lastmodified = getLastModifiedDate(url) print "Done Last Modified" lowest = getLowest( [bitly, topsy, google, backlink, lastmodified, archives["Earliest"]]) print "Got Lowest" result = [] result.append(("URI", url)) result.append(("Estimated Creation Date", lowest)) result.append(("Last Modified", lastmodified)) result.append(("Bitly.com", bitly)) result.append(("Topsy.com", topsy)) result.append(("Backlinks", backlink)) result.append(("Google.com", google)) result.append(("Archives", archives)) values = OrderedDict(result) r = json.dumps(values, sort_keys=False, indent=2, separators=(',', ': ')) print r return lowest
def getGoogleCreationDate(url): inurl_creation_date = "" try: query = 'https://www.google.com/search?hl=en&tbo=d&tbs=qdr:y15&q=inurl:'+url+'&oq=inurl:'+url page = commands.getoutput('curl --silent -L -A "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.112 Safari/534.30" "'+query+'"') loc = 0 lowest_date = 99999999999 while(True): start_str = 'class="f std" >' loc = page.find(start_str,loc) fin = page.find("</span>", loc) if(loc==-1): break timestamp = page[loc+len(start_str):fin] epoch = int(calendar.timegm(time.strptime(timestamp, '%b %d, %Y'))) limitEpoch = int(calendar.timegm(time.strptime("1995-01-01T12:00:00", '%Y-%m-%dT%H:%M:%S'))) if(epoch<limitEpoch): continue if(epoch<lowest_date): lowest_date = epoch inurl_creation_date = time.strftime('%Y-%m-%dT%H:%M:%S', time.gmtime(lowest_date)) loc = fin except: pass search_creation_date = "" try: query = 'https://www.google.com/search?hl=en&tbo=d&tbs=qdr:y15&q='+url page = commands.getoutput('curl --silent -L -A "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.112 Safari/534.30" "'+query+'"') loc = 0 lowest_date = 99999999999 while(True): start_str = 'class="f std" >' loc = page.find(start_str,loc) fin = page.find("</span>", loc) if(loc==-1): break timestamp = page[loc+len(start_str):fin] epoch = int(calendar.timegm(time.strptime(timestamp, '%b %d, %Y'))) limitEpoch = int(calendar.timegm(time.strptime("1995-01-01T12:00:00", '%Y-%m-%dT%H:%M:%S'))) if(epoch<limitEpoch): continue if(epoch<lowest_date): lowest_date = epoch search_creation_date = time.strftime('%Y-%m-%dT%H:%M:%S', time.gmtime(lowest_date)) loc = fin except: pass return getLowest([search_creation_date,inurl_creation_date])
def getBacklinksCreationDates(url): links = getBacklinks(url) backlinks = [] try: for link in links: bitly = getBitlyCreationDate(link) archives = getArchivesCreationDate(link) topsy = getTopsyCreationDate(link) google = getGoogleCreationDate(link) lowest = getLowest([bitly, topsy, google, archives["Earliest"]]) if (lowest == ""): continue backlinks.append(lowest) except: print sys.exc_info() return backlinks
def getBacklinksCreationDates(url): links = getBacklinks(url) backlinks = [] try: for link in links: bitly = getBitlyCreationDate(link) archives = getArchivesCreationDate(link) topsy = getTopsyCreationDate(link) google = getGoogleCreationDate(link) lowest = getLowest([bitly,topsy,google,archives["Earliest"]]) if(lowest==""): continue backlinks.append(lowest) except: print sys.exc_info() return backlinks
def index(url): response.content_type = 'application/json; charset=UTF-8' print "\n--- Getting Creation dates for:\n" + url + "\n" bitly = getBitlyCreationDate(url) print "Done Bitly" archives = getArchivesCreationDate(url) print "Done Archives" topsy = getTopsyCreationDate(url) print "Done Topsy" google = getGoogleCreationDate(url) print "Done Google" backlink = getBacklinksFirstAppearanceDates(url) print "Done Backlinks" lastmodified = getLastModifiedDate(url) print "Done Last Modified" lowest = getLowest( [bitly, topsy, google, backlink, lastmodified, archives["Earliest"]]) print "Got Lowest" result = [] result.append(("URI", url)) result.append(("Estimated Creation Date", lowest)) result.append(("Last Modified", lastmodified)) result.append(("Bitly.com", bitly)) result.append(("Topsy.com", topsy)) result.append(("Backlinks", backlink)) result.append(("Google.com", google)) result.append(("Archives", archives)) values = OrderedDict(result) #Corren: changed json call to simplejson due to runtime error r = simplejson.dumps(values, sort_keys=False, indent=2, separators=(',', ': ')) print r #Corren: extract the just desired element createDate = values['Estimated Creation Date'] return createDate
def cd(url, backlinksFlag=False): print "Getting Creation dates for: " + url threads = [] outputArray = ["", "", "", "", "", ""] now0 = datetime.datetime.now() lastmodifiedThread = Thread(target=getLastModifiedDate, args=(url, outputArray, 0)) bitlyThread = Thread(target=getBitlyCreationDate, args=(url, outputArray, 1)) googleThread = Thread(target=getGoogleCreationDate, args=(url, outputArray, 2)) archivesThread = Thread(target=getArchivesCreationDate, args=(url, outputArray, 3)) if backlinksFlag: backlinkThread = Thread(target=getBacklinksFirstAppearanceDates, args=(url, outputArray, 4)) topsyThread = Thread(target=getTopsyCreationDate, args=(url, outputArray, 5)) # Add threads to thread list threads.append(lastmodifiedThread) threads.append(bitlyThread) threads.append(googleThread) threads.append(archivesThread) if backlinksFlag: threads.append(backlinkThread) threads.append(topsyThread) # Start new Threads lastmodifiedThread.start() bitlyThread.start() googleThread.start() archivesThread.start() if backlinksFlag: backlinkThread.start() topsyThread.start() # Wait for all threads to complete for t in threads: t.join() # For threads lastmodified = outputArray[0] bitly = outputArray[1] google = outputArray[2] archives = outputArray[3] if backlinksFlag: backlink = outputArray[4] else: backlink = "" topsy = outputArray[5] # note that archives["Earliest"] = archives[0][1] try: lowest = getLowest([lastmodified, bitly, google, archives[0][1], backlink, topsy]) # for thread except: print sys.exc_type, sys.exc_value, sys.exc_traceback result = [] result.append(("URI", url)) result.append(("Estimated Creation Date", lowest)) result.append(("Last Modified", lastmodified)) result.append(("Bitly.com", bitly)) result.append(("Topsy.com", topsy)) result.append(("Backlinks", backlink)) result.append(("Google.com", google)) result.append(("Archives", archives)) values = OrderedDict(result) r = json.dumps(values, sort_keys=False, indent=2, separators=(",", ": ")) now1 = datetime.datetime.now() - now0 # print "runtime in seconds: " # print now1.seconds # print r # print 'runtime in seconds: ' + str(now1.seconds) + '\n' + r + '\n' saveFile = open("A2_Q3.txt", "a") saveFile.write("{:<20} {} ".format(lowest, url)) saveFile.write("\n") saveFile.close() return r
def getGoogleCreationDate(url, outputArray, indexOfOutputArray): inurl_creation_date = "" try: query = 'https://www.google.com/search?hl=en&tbo=d&tbs=qdr:y15&q=inurl:'+url+'&oq=inurl:'+url page = commands.getoutput('curl --silent -L -A "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.112 Safari/534.30" "'+query+'"') signatureString = ' - </span>' locationOfSignature = 0 lowest_date = 99999999999 while(True): #retrieve date from preceding " - </span>" signature - start #this logic is meant to retrieve date from a string of form: ">DateIsHere- </span>" locationOfSignature = page.find(signatureString, locationOfSignature) locationOfSignature timestamp = '' if locationOfSignature != -1: k = locationOfSignature while 1==1 and k > -1: #end marker if page[k] != '>' : timestamp = page[k] + timestamp else : break k = k - 1; locationOfSignature = locationOfSignature + len(signatureString) timestamp = timestamp.strip() else : break #retrieve date from preceding " - </span>" signature - end #print "" #print "timestamp: " + timestamp epoch = int(calendar.timegm(time.strptime(timestamp, '%b %d, %Y'))) limitEpoch = int(calendar.timegm(time.strptime("1995-01-01T12:00:00", '%Y-%m-%dT%H:%M:%S'))) if(epoch<limitEpoch): continue if(epoch<lowest_date): lowest_date = epoch inurl_creation_date = time.strftime('%Y-%m-%dT%H:%M:%S', time.gmtime(lowest_date)) except: pass search_creation_date = "" try: query = 'https://www.google.com/search?hl=en&tbo=d&tbs=qdr:y15&q='+url page = commands.getoutput('curl --silent -L -A "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.112 Safari/534.30" "'+query+'"') signatureString = ' - </span>' locationOfSignature = 0 lowest_date = 99999999999 while(True): #firstaid, not permanent fix #retrieve date from preceding " - </span>" signature - start #this logic is meant to retrieve date from a string of form: ">DateIsHere- </span>" locationOfSignature = page.find(signatureString, locationOfSignature) timestamp = '' if locationOfSignature != -1: k = locationOfSignature while 1==1 and k > -1: if page[k] != '>' : timestamp = page[k] + timestamp else : break k = k - 1; locationOfSignature = locationOfSignature + len(signatureString) timestamp = timestamp.strip() else : break #retrieve date from preceding " - </span>" signature - end #print "" #print "timestamp: " + timestamp epoch = int(calendar.timegm(time.strptime(timestamp, '%b %d, %Y'))) limitEpoch = int(calendar.timegm(time.strptime("1995-01-01T12:00:00", '%Y-%m-%dT%H:%M:%S'))) if(epoch<limitEpoch): continue if(epoch<lowest_date): lowest_date = epoch search_creation_date = time.strftime('%Y-%m-%dT%H:%M:%S', time.gmtime(lowest_date)) except: pass lowerDate = getLowest([search_creation_date,inurl_creation_date]) outputArray[indexOfOutputArray] = lowerDate print "Done Google" return lowerDate
def cd(url, backlinksFlag=False): print 'Getting Creation dates for: ' + url threads = [] outputArray = ['', '', '', '', '', ''] now0 = datetime.datetime.now() lastmodifiedThread = Thread(target=getLastModifiedDate, args=(url, outputArray, 0)) bitlyThread = Thread(target=getBitlyCreationDate, args=(url, outputArray, 1)) googleThread = Thread(target=getGoogleCreationDate, args=(url, outputArray, 2)) archivesThread = Thread(target=getArchivesCreationDate, args=(url, outputArray, 3)) if (backlinksFlag): backlinkThread = Thread(target=getBacklinksFirstAppearanceDates, args=(url, outputArray, 4)) topsyThread = Thread(target=getTopsyCreationDate, args=(url, outputArray, 5)) # Add threads to thread list threads.append(lastmodifiedThread) threads.append(bitlyThread) threads.append(googleThread) threads.append(archivesThread) if (backlinksFlag): threads.append(backlinkThread) threads.append(topsyThread) # Start new Threads lastmodifiedThread.start() bitlyThread.start() googleThread.start() archivesThread.start() if (backlinksFlag): backlinkThread.start() topsyThread.start() # Wait for all threads to complete for t in threads: t.join() # For threads lastmodified = outputArray[0] bitly = outputArray[1] google = outputArray[2] archives = outputArray[3] if (backlinksFlag): backlink = outputArray[4] else: backlink = '' topsy = outputArray[5] #note that archives["Earliest"] = archives[0][1] try: lowest = getLowest( [lastmodified, bitly, google, archives[0][1], backlink, topsy]) #for thread except: print sys.exc_type, sys.exc_value, sys.exc_traceback result = [] result.append(("URI", url)) result.append(("Estimated Creation Date", lowest)) result.append(("Last Modified", lastmodified)) result.append(("Bitly.com", bitly)) result.append(("Topsy.com", topsy)) result.append(("Backlinks", backlink)) result.append(("Google.com", google)) result.append(("Archives", archives)) values = OrderedDict(result) r = json.dumps(values, sort_keys=False, indent=2, separators=(',', ': ')) now1 = datetime.datetime.now() - now0 #print "runtime in seconds: " #print now1.seconds #print r #print 'runtime in seconds: ' + str(now1.seconds) + '\n' + r + '\n' saveFile = open('links_output_4.txt', 'a') saveFile.write("{:<20} {} ".format(lowest, url)) saveFile.write('\n') saveFile.close() return r
def cd(url, backlinksFlag = False): #print 'Getting Creation dates for: ' + url #scheme missing? parsedUrl = urlparse.urlparse(url) if( len(parsedUrl.scheme)<1 ): url = 'http://'+url threads = [] outputArray =['','','','','',''] now0 = datetime.datetime.now() lastmodifiedThread = Thread(target=getLastModifiedDate, args=(url, outputArray, 0)) bitlyThread = Thread(target=getBitlyCreationDate, args=(url, outputArray, 1)) googleThread = Thread(target=getGoogleCreationDate, args=(url, outputArray, 2)) archivesThread = Thread(target=getArchivesCreationDate, args=(url, outputArray, 3)) if( backlinksFlag ): backlinkThread = Thread(target=getBacklinksFirstAppearanceDates, args=(url, outputArray, 4)) #topsyThread = Thread(target=getTopsyCreationDate, args=(url, outputArray, 5)) # Add threads to thread list threads.append(lastmodifiedThread) threads.append(bitlyThread) threads.append(googleThread) threads.append(archivesThread) if( backlinksFlag ): threads.append(backlinkThread) #threads.append(topsyThread) # Start new Threads lastmodifiedThread.start() bitlyThread.start() googleThread.start() archivesThread.start() if( backlinksFlag ): backlinkThread.start() #topsyThread.start() # Wait for all threads to complete for t in threads: t.join() # For threads lastmodified = outputArray[0] bitly = outputArray[1] google = outputArray[2] archives = outputArray[3] if( backlinksFlag ): backlink = outputArray[4] else: backlink = '' #topsy = outputArray[5] #note that archives["Earliest"] = archives[0][1] try: #lowest = getLowest([lastmodified, bitly, google, archives[0][1], backlink, topsy]) #for thread lowest = getLowest([lastmodified, bitly, google, archives[0][1], backlink]) #for thread except: print sys.exc_type, sys.exc_value , sys.exc_traceback result = [] result.append(("URI", url)) result.append(("Estimated Creation Date", lowest)) result.append(("Last Modified", lastmodified)) result.append(("Bitly.com", bitly)) result.append(("Topsy.com", "Topsy is out of service")) result.append(("Backlinks", backlink)) result.append(("Google.com", google)) result.append(("Archives", archives)) values = OrderedDict(result) r = json.dumps(values, sort_keys=False, indent=2, separators=(',', ': ')) now1 = datetime.datetime.now() - now0 #print "runtime in seconds: " #print now1.seconds #print r print 'runtime in seconds: ' + str(now1.seconds) + '\n' + r + '\n' return r
def getGoogleCreationDate(url, outputArray, indexOfOutputArray): inurl_creation_date = "" try: query = 'https://www.google.com/search?hl=en&tbo=d&tbs=qdr:y15&q=inurl:' + url + '&oq=inurl:' + url page = commands.getoutput( 'curl --silent -L -A "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.112 Safari/534.30" "' + query + '"') signatureString = ' - </span>' locationOfSignature = 0 lowest_date = 99999999999 while (True): #retrieve date from preceding " - </span>" signature - start #this logic is meant to retrieve date from a string of form: ">DateIsHere- </span>" locationOfSignature = page.find(signatureString, locationOfSignature) locationOfSignature timestamp = '' if locationOfSignature != -1: k = locationOfSignature while 1 == 1 and k > -1: #end marker if page[k] != '>': timestamp = page[k] + timestamp else: break k = k - 1 locationOfSignature = locationOfSignature + len( signatureString) timestamp = timestamp.strip() else: break #retrieve date from preceding " - </span>" signature - end #print "" #print "timestamp: " + timestamp epoch = int(calendar.timegm(time.strptime(timestamp, '%b %d, %Y'))) limitEpoch = int( calendar.timegm( time.strptime("1995-01-01T12:00:00", '%Y-%m-%dT%H:%M:%S'))) if (epoch < limitEpoch): continue if (epoch < lowest_date): lowest_date = epoch inurl_creation_date = time.strftime('%Y-%m-%dT%H:%M:%S', time.gmtime(lowest_date)) except: pass search_creation_date = "" try: query = 'https://www.google.com/search?hl=en&tbo=d&tbs=qdr:y15&q=' + url page = commands.getoutput( 'curl --silent -L -A "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.112 Safari/534.30" "' + query + '"') signatureString = ' - </span>' locationOfSignature = 0 lowest_date = 99999999999 while (True): #firstaid, not permanent fix #retrieve date from preceding " - </span>" signature - start #this logic is meant to retrieve date from a string of form: ">DateIsHere- </span>" locationOfSignature = page.find(signatureString, locationOfSignature) timestamp = '' if locationOfSignature != -1: k = locationOfSignature while 1 == 1 and k > -1: if page[k] != '>': timestamp = page[k] + timestamp else: break k = k - 1 locationOfSignature = locationOfSignature + len( signatureString) timestamp = timestamp.strip() else: break #retrieve date from preceding " - </span>" signature - end #print "" #print "timestamp: " + timestamp epoch = int(calendar.timegm(time.strptime(timestamp, '%b %d, %Y'))) limitEpoch = int( calendar.timegm( time.strptime("1995-01-01T12:00:00", '%Y-%m-%dT%H:%M:%S'))) if (epoch < limitEpoch): continue if (epoch < lowest_date): lowest_date = epoch search_creation_date = time.strftime('%Y-%m-%dT%H:%M:%S', time.gmtime(lowest_date)) except: pass lowerDate = getLowest([search_creation_date, inurl_creation_date]) outputArray[indexOfOutputArray] = lowerDate print "Done Google" return lowerDate
def cd(url, backlinksFlag=False): # print 'Getting Creation dates for: ' + url # scheme missing? parsedUrl = urlparse.urlparse(url) if len(parsedUrl.scheme) < 1: url = "http://" + url threads = [] outputArray = ["", "", "", "", "", ""] now0 = datetime.datetime.now() lastmodifiedThread = Thread(target=getLastModifiedDate, args=(url, outputArray, 0)) bitlyThread = Thread(target=getBitlyCreationDate, args=(url, outputArray, 1)) googleThread = Thread(target=getGoogleCreationDate, args=(url, outputArray, 2)) archivesThread = Thread(target=getArchivesCreationDate, args=(url, outputArray, 3)) if backlinksFlag: backlinkThread = Thread(target=getBacklinksFirstAppearanceDates, args=(url, outputArray, 4)) # topsyThread = Thread(target=getTopsyCreationDate, args=(url, outputArray, 5)) # Add threads to thread list threads.append(lastmodifiedThread) threads.append(bitlyThread) threads.append(googleThread) threads.append(archivesThread) if backlinksFlag: threads.append(backlinkThread) # threads.append(topsyThread) # Start new Threads lastmodifiedThread.start() bitlyThread.start() googleThread.start() archivesThread.start() if backlinksFlag: backlinkThread.start() # topsyThread.start() # Wait for all threads to complete for t in threads: t.join() # For threads lastmodified = outputArray[0] bitly = outputArray[1] google = outputArray[2] archives = outputArray[3] if backlinksFlag: backlink = outputArray[4] else: backlink = "" # topsy = outputArray[5] # note that archives["Earliest"] = archives[0][1] try: # lowest = getLowest([lastmodified, bitly, google, archives[0][1], backlink, topsy]) #for thread lowest = getLowest([lastmodified, bitly, google, archives[0][1], backlink]) # for thread except: print sys.exc_type, sys.exc_value, sys.exc_traceback result = [] result.append(("URI", url)) result.append(("Estimated Creation Date", lowest)) values = OrderedDict(result) r = json.dumps(values, sort_keys=False, indent=2, separators=(",", ": ")) now1 = datetime.datetime.now() - now0 return r
def cd(self, url): if len(url) < 1: return "Url length less than 1" # scheme missing? parsedUrl = urlparse.urlparse(url) if len(parsedUrl.scheme) < 1: url = "http://" + url response = cherrypy.response response.headers["Content-Type"] = "application/json" print "Getting Creation dates for: " + url threads = [] outputArray = ["", "", "", "", "", ""] now0 = datetime.datetime.now() lastmodifiedThread = Thread(target=getLastModifiedDate, args=(url, outputArray, 0)) bitlyThread = Thread(target=getBitlyCreationDate, args=(url, outputArray, 1)) googleThread = Thread(target=getGoogleCreationDate, args=(url, outputArray, 2)) archivesThread = Thread(target=getArchivesCreationDate, args=(url, outputArray, 3)) backlinkThread = Thread(target=getBacklinksFirstAppearanceDates, args=(url, outputArray, 4)) topsyThread = Thread(target=getTopsyCreationDate, args=(url, outputArray, 5)) # Add threads to thread list threads.append(lastmodifiedThread) threads.append(bitlyThread) threads.append(googleThread) threads.append(archivesThread) threads.append(backlinkThread) threads.append(topsyThread) # Start new Threads lastmodifiedThread.start() bitlyThread.start() googleThread.start() archivesThread.start() backlinkThread.start() topsyThread.start() # Wait for all threads to complete for t in threads: t.join() # For threads lastmodified = outputArray[0] bitly = outputArray[1] google = outputArray[2] archives = outputArray[3] backlink = outputArray[4] topsy = outputArray[5] # note that archives["Earliest"] = archives[0][1] try: lowest = getLowest([lastmodified, bitly, google, archives[0][1], backlink, topsy]) # for thread except: print sys.exc_type, sys.exc_value, sys.exc_traceback result = [] result.append(("URI", url)) result.append(("Estimated Creation Date", lowest)) result.append(("Last Modified", lastmodified)) result.append(("Bitly.com", bitly)) result.append(("Topsy.com", topsy)) result.append(("Backlinks", backlink)) result.append(("Google.com", google)) result.append(("Archives", archives)) values = OrderedDict(result) r = json.dumps(values, sort_keys=False, indent=2, separators=(",", ": ")) now1 = datetime.datetime.now() - now0 # print "runtime in seconds: " # print now1.seconds # print r print "runtime in seconds: " + str(now1.seconds) + "\n" + r + "\n" return r
def cd(url, backlinksFlag=False): #print 'Getting Creation dates for: ' + url #scheme missing? parsedUrl = urlparse.urlparse(url) if (len(parsedUrl.scheme) < 1): url = 'http://' + url threads = [] outputArray = ['', '', '', '', '', ''] now0 = datetime.datetime.now() lastmodifiedThread = Thread(target=getLastModifiedDate, args=(url, outputArray, 0)) bitlyThread = Thread(target=getBitlyCreationDate, args=(url, outputArray, 1)) googleThread = Thread(target=getGoogleCreationDate, args=(url, outputArray, 2)) archivesThread = Thread(target=getArchivesCreationDate, args=(url, outputArray, 3)) if (backlinksFlag): backlinkThread = Thread(target=getBacklinksFirstAppearanceDates, args=(url, outputArray, 4)) #topsyThread = Thread(target=getTopsyCreationDate, args=(url, outputArray, 5)) # Add threads to thread list threads.append(lastmodifiedThread) threads.append(bitlyThread) threads.append(googleThread) threads.append(archivesThread) if (backlinksFlag): threads.append(backlinkThread) #threads.append(topsyThread) # Start new Threads lastmodifiedThread.start() bitlyThread.start() googleThread.start() archivesThread.start() if (backlinksFlag): backlinkThread.start() #topsyThread.start() # Wait for all threads to complete for t in threads: t.join() # For threads lastmodified = outputArray[0] bitly = outputArray[1] google = outputArray[2] archives = outputArray[3] if (backlinksFlag): backlink = outputArray[4] else: backlink = '' #topsy = outputArray[5] #note that archives["Earliest"] = archives[0][1] try: #lowest = getLowest([lastmodified, bitly, google, archives[0][1], backlink, topsy]) #for thread lowest = getLowest( [lastmodified, bitly, google, archives[0][1], backlink]) #for thread except: print sys.exc_type, sys.exc_value, sys.exc_traceback result = [] result.append(("URI", url)) result.append(("Estimated Creation Date", lowest)) values = OrderedDict(result) r = json.dumps(values, sort_keys=False, indent=2, separators=(',', ': ')) now1 = datetime.datetime.now() - now0 #print "runtime in seconds: " #print now1.seconds #print r print 'runtime in seconds: ' + str(now1.seconds) + '\n' + r + '\n' return r
def cd(self, url): if(len(url) < 1): return "Url length less than 1" response = cherrypy.response response.headers['Content-Type'] = 'application/json' print 'Getting Creation dates for: ' + url threads = [] outputArray =['','','','','',''] now0 = datetime.datetime.now() lastmodifiedThread = Thread(target=getLastModifiedDate, args=(url, outputArray, 0)) bitlyThread = Thread(target=getBitlyCreationDate, args=(url, outputArray, 1)) googleThread = Thread(target=getGoogleCreationDate, args=(url, outputArray, 2)) archivesThread = Thread(target=getArchivesCreationDate, args=(url, outputArray, 3)) backlinkThread = Thread(target=getBacklinksFirstAppearanceDates, args=(url, outputArray, 4)) topsyThread = Thread(target=getTopsyCreationDate, args=(url, outputArray, 5)) # Add threads to thread list threads.append(lastmodifiedThread) threads.append(bitlyThread) threads.append(googleThread) threads.append(archivesThread) threads.append(backlinkThread) threads.append(topsyThread) # Start new Threads lastmodifiedThread.start() bitlyThread.start() googleThread.start() archivesThread.start() backlinkThread.start() topsyThread.start() # Wait for all threads to complete for t in threads: t.join() # For threads lastmodified = outputArray[0] bitly = outputArray[1] google = outputArray[2] archives = outputArray[3] backlink = outputArray[4] topsy = outputArray[5] #note that archives["Earliest"] = archives[0][1] try: lowest = getLowest([lastmodified, bitly, google, archives[0][1], backlink, topsy]) #for thread except: print sys.exc_type, sys.exc_value , sys.exc_traceback result = [] result.append(("URI", url)) result.append(("Estimated Creation Date", lowest)) result.append(("Last Modified", lastmodified)) result.append(("Bitly.com", bitly)) result.append(("Topsy.com", topsy)) result.append(("Backlinks", backlink)) result.append(("Google.com", google)) result.append(("Archives", archives)) values = OrderedDict(result) r = json.dumps(values, sort_keys=False, indent=2, separators=(',', ': ')) now1 = datetime.datetime.now() - now0 #print "runtime in seconds: " #print now1.seconds #print r print 'runtime in seconds: ' + str(now1.seconds) + '\n' + r + '\n' return r
def cd(url, backlinksFlag = False): #print 'Getting Creation dates for: ' + url #scheme missing? parsedUrl = urlparse.urlparse(url) if( len(parsedUrl.scheme)<1 ): url = 'http://'+url threads = [] outputArray =['','','','','',''] now0 = datetime.datetime.now() lastmodifiedThread = Thread(target=getLastModifiedDate, args=(url, outputArray, 0)) bitlyThread = Thread(target=getBitlyCreationDate, args=(url, outputArray, 1)) googleThread = Thread(target=getGoogleCreationDate, args=(url, outputArray, 2)) archivesThread = Thread(target=getArchivesCreationDate, args=(url, outputArray, 3)) if( backlinksFlag ): backlinkThread = Thread(target=getBacklinksFirstAppearanceDates, args=(url, outputArray, 4)) #topsyThread = Thread(target=getTopsyCreationDate, args=(url, outputArray, 5)) # Add threads to thread list threads.append(lastmodifiedThread) threads.append(bitlyThread) threads.append(googleThread) threads.append(archivesThread) if( backlinksFlag ): threads.append(backlinkThread) #threads.append(topsyThread) # Start new Threads lastmodifiedThread.start() bitlyThread.start() googleThread.start() archivesThread.start() if( backlinksFlag ): backlinkThread.start() #topsyThread.start() # Wait for all threads to complete for t in threads: t.join() # For threads lastmodified = outputArray[0] bitly = outputArray[1] google = outputArray[2] archives = outputArray[3] if( backlinksFlag ): backlink = outputArray[4] else: backlink = '' try: lowest = getLowest([lastmodified, bitly, google, archives[0][1], backlink]) #for thread except: print sys.exc_type, sys.exc_value , sys.exc_traceback file2=open('dates.csv','a') print lowest file2.write("%s\n"% lowest)