Beispiel #1
0
def index(url):
	response.content_type = 'application/json; charset=UTF-8'
	print"\n\n\n\n\n--------------------------------\n--- Getting Creation dates for:\n"+url+"\n\n"

	bitly = getBitlyCreationDate(url)
	print "Done Bitly"
	archives = getArchivesCreationDate(url)
	print "Done Archives"
	topsy = getTopsyCreationDate(url)
	print "Done Topsy"
	google = getGoogleCreationDate(url)
	print "Done Google"
	backlink = getBacklinksFirstAppearanceDates(url)
	print "Done Backlinks"
	lastmodified = getLastModifiedDate(url)
	print "Done Last Modified"
	lowest = getLowest([bitly,topsy,google,backlink,lastmodified,archives["Earliest"]])
	print "Got Lowest"

	result = []
	result.append(("URI", url))
	result.append(("Estimated Creation Date", lowest))
	result.append(("Last Modified", lastmodified))
	result.append(("Bitly.com", bitly))
	result.append(("Topsy.com", topsy))
	result.append(("Backlinks", backlink))
	result.append(("Google.com", google))
	result.append(("Archives", archives))

	values = OrderedDict(result)

	r = jsonlib.dumps(values, sort_keys=False, indent=2, separators=(',', ': '))
	print r
	return r
Beispiel #2
0
def carbonDate(url):
    print "\n\n\n\n\n--------------------------------\n--- Getting Creation dates for:\n" + url + "\n\n"

    bitly = getBitlyCreationDate(url)
    print "Done Bitly"
    archives = getArchivesCreationDate(url)
    print "Done Archives"
    topsy = getTopsyCreationDate(url)
    print "Done Topsy"
    google = getGoogleCreationDate(url)
    print "Done Google"
    backlink = getBacklinksFirstAppearanceDates(url)
    print "Done Backlinks"
    lastmodified = getLastModifiedDate(url)
    print "Done Last Modified"
    lowest = getLowest(
        [bitly, topsy, google, backlink, lastmodified, archives["Earliest"]])
    print "Got Lowest"

    result = []
    result.append(("URI", url))
    result.append(("Estimated Creation Date", lowest))
    result.append(("Last Modified", lastmodified))
    result.append(("Bitly.com", bitly))
    result.append(("Topsy.com", topsy))
    result.append(("Backlinks", backlink))
    result.append(("Google.com", google))
    result.append(("Archives", archives))
    values = OrderedDict(result)
    r = json.dumps(values, sort_keys=False, indent=2, separators=(',', ': '))
    print r
    return lowest
Beispiel #3
0
def getGoogleCreationDate(url):
	inurl_creation_date = ""
	try:
		query = 'https://www.google.com/search?hl=en&tbo=d&tbs=qdr:y15&q=inurl:'+url+'&oq=inurl:'+url
		page = commands.getoutput('curl --silent -L -A "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.112 Safari/534.30" "'+query+'"')
		loc = 0	
		lowest_date = 99999999999
		while(True):
			start_str = 'class="f std" >'
			loc = page.find(start_str,loc)
			fin = page.find("</span>", loc)
			if(loc==-1):
				break
			timestamp = page[loc+len(start_str):fin]
			epoch = int(calendar.timegm(time.strptime(timestamp, '%b %d, %Y')))

			limitEpoch = int(calendar.timegm(time.strptime("1995-01-01T12:00:00", '%Y-%m-%dT%H:%M:%S')))
			if(epoch<limitEpoch):
				continue

			if(epoch<lowest_date):
				lowest_date = epoch
			inurl_creation_date = time.strftime('%Y-%m-%dT%H:%M:%S', time.gmtime(lowest_date))
			loc = fin
	except:
		pass

	search_creation_date = ""
	try:
		query = 'https://www.google.com/search?hl=en&tbo=d&tbs=qdr:y15&q='+url
		page = commands.getoutput('curl --silent -L -A "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.112 Safari/534.30" "'+query+'"')
		loc = 0	
		lowest_date = 99999999999
		while(True):
			start_str = 'class="f std" >'
			loc = page.find(start_str,loc)
			fin = page.find("</span>", loc)
			if(loc==-1):
				break
			timestamp = page[loc+len(start_str):fin]
			epoch = int(calendar.timegm(time.strptime(timestamp, '%b %d, %Y')))

			limitEpoch = int(calendar.timegm(time.strptime("1995-01-01T12:00:00", '%Y-%m-%dT%H:%M:%S')))
			if(epoch<limitEpoch):
				continue

			if(epoch<lowest_date):
				lowest_date = epoch
			search_creation_date = time.strftime('%Y-%m-%dT%H:%M:%S', time.gmtime(lowest_date))
			loc = fin
	except:
		pass

	return getLowest([search_creation_date,inurl_creation_date])
Beispiel #4
0
def getBacklinksCreationDates(url):
    links = getBacklinks(url)
    backlinks = []
    try:
        for link in links:
            bitly = getBitlyCreationDate(link)
            archives = getArchivesCreationDate(link)
            topsy = getTopsyCreationDate(link)
            google = getGoogleCreationDate(link)
            lowest = getLowest([bitly, topsy, google, archives["Earliest"]])
            if (lowest == ""):
                continue
            backlinks.append(lowest)

    except:
        print sys.exc_info()
    return backlinks
def getBacklinksCreationDates(url):
	links = getBacklinks(url)
	backlinks = []
	try:
		for link in links:
			bitly = getBitlyCreationDate(link)
			archives = getArchivesCreationDate(link)
			topsy = getTopsyCreationDate(link)
			google = getGoogleCreationDate(link)
			lowest = getLowest([bitly,topsy,google,archives["Earliest"]])
			if(lowest==""):
				continue
			backlinks.append(lowest)

	except:
		print sys.exc_info()
	return backlinks
Beispiel #6
0
def index(url):
    response.content_type = 'application/json; charset=UTF-8'
    print "\n--- Getting Creation dates for:\n" + url + "\n"

    bitly = getBitlyCreationDate(url)
    print "Done Bitly"
    archives = getArchivesCreationDate(url)
    print "Done Archives"
    topsy = getTopsyCreationDate(url)
    print "Done Topsy"
    google = getGoogleCreationDate(url)
    print "Done Google"
    backlink = getBacklinksFirstAppearanceDates(url)
    print "Done Backlinks"
    lastmodified = getLastModifiedDate(url)
    print "Done Last Modified"
    lowest = getLowest(
        [bitly, topsy, google, backlink, lastmodified, archives["Earliest"]])
    print "Got Lowest"

    result = []
    result.append(("URI", url))
    result.append(("Estimated Creation Date", lowest))
    result.append(("Last Modified", lastmodified))
    result.append(("Bitly.com", bitly))
    result.append(("Topsy.com", topsy))
    result.append(("Backlinks", backlink))
    result.append(("Google.com", google))
    result.append(("Archives", archives))
    values = OrderedDict(result)
    #Corren: changed json call to simplejson due to runtime error
    r = simplejson.dumps(values,
                         sort_keys=False,
                         indent=2,
                         separators=(',', ': '))
    print r
    #Corren: extract the just desired element
    createDate = values['Estimated Creation Date']
    return createDate
Beispiel #7
0
def cd(url, backlinksFlag=False):
    print "Getting Creation dates for: " + url

    threads = []
    outputArray = ["", "", "", "", "", ""]
    now0 = datetime.datetime.now()

    lastmodifiedThread = Thread(target=getLastModifiedDate, args=(url, outputArray, 0))
    bitlyThread = Thread(target=getBitlyCreationDate, args=(url, outputArray, 1))
    googleThread = Thread(target=getGoogleCreationDate, args=(url, outputArray, 2))
    archivesThread = Thread(target=getArchivesCreationDate, args=(url, outputArray, 3))

    if backlinksFlag:
        backlinkThread = Thread(target=getBacklinksFirstAppearanceDates, args=(url, outputArray, 4))

    topsyThread = Thread(target=getTopsyCreationDate, args=(url, outputArray, 5))

    # Add threads to thread list
    threads.append(lastmodifiedThread)
    threads.append(bitlyThread)
    threads.append(googleThread)
    threads.append(archivesThread)

    if backlinksFlag:
        threads.append(backlinkThread)

    threads.append(topsyThread)

    # Start new Threads
    lastmodifiedThread.start()
    bitlyThread.start()
    googleThread.start()
    archivesThread.start()

    if backlinksFlag:
        backlinkThread.start()

    topsyThread.start()

    # Wait for all threads to complete
    for t in threads:
        t.join()

    # For threads
    lastmodified = outputArray[0]
    bitly = outputArray[1]
    google = outputArray[2]
    archives = outputArray[3]

    if backlinksFlag:
        backlink = outputArray[4]
    else:
        backlink = ""

    topsy = outputArray[5]

    # note that archives["Earliest"] = archives[0][1]
    try:
        lowest = getLowest([lastmodified, bitly, google, archives[0][1], backlink, topsy])  # for thread
    except:
        print sys.exc_type, sys.exc_value, sys.exc_traceback

    result = []

    result.append(("URI", url))
    result.append(("Estimated Creation Date", lowest))
    result.append(("Last Modified", lastmodified))
    result.append(("Bitly.com", bitly))
    result.append(("Topsy.com", topsy))
    result.append(("Backlinks", backlink))
    result.append(("Google.com", google))
    result.append(("Archives", archives))
    values = OrderedDict(result)
    r = json.dumps(values, sort_keys=False, indent=2, separators=(",", ": "))

    now1 = datetime.datetime.now() - now0

    # print "runtime in seconds: "
    # print now1.seconds
    # print r
    # print 'runtime in seconds:  ' +  str(now1.seconds) + '\n' + r + '\n'
    saveFile = open("A2_Q3.txt", "a")
    saveFile.write("{:<20} {} ".format(lowest, url))
    saveFile.write("\n")
    saveFile.close()
    return r
def getGoogleCreationDate(url, outputArray, indexOfOutputArray):
	inurl_creation_date = ""
	try:
		query = 'https://www.google.com/search?hl=en&tbo=d&tbs=qdr:y15&q=inurl:'+url+'&oq=inurl:'+url
		page = commands.getoutput('curl --silent -L -A "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.112 Safari/534.30" "'+query+'"')

		
		
		signatureString = ' - </span>'
		locationOfSignature = 0
		lowest_date = 99999999999
		while(True):
			

			

			#retrieve date from preceding " - </span>" signature - start
			#this logic is meant to retrieve date from a string of form: ">DateIsHere- </span>"
			locationOfSignature = page.find(signatureString, locationOfSignature)
			locationOfSignature
			timestamp = ''
			
			
			if locationOfSignature != -1:
				k = locationOfSignature
				while 1==1 and k > -1:
					#end marker
    					if page[k] != '>' :
						timestamp = page[k] + timestamp
					else :
						break
					k = k - 1;
				locationOfSignature = locationOfSignature + len(signatureString)
				
				timestamp = timestamp.strip()
			else :
				break
			#retrieve date from preceding " - </span>" signature - end
			
			#print ""
			#print "timestamp: " + timestamp


			epoch = int(calendar.timegm(time.strptime(timestamp, '%b %d, %Y')))
		

			limitEpoch = int(calendar.timegm(time.strptime("1995-01-01T12:00:00", '%Y-%m-%dT%H:%M:%S')))
			if(epoch<limitEpoch):
				continue
			
			if(epoch<lowest_date):
				lowest_date = epoch
			inurl_creation_date = time.strftime('%Y-%m-%dT%H:%M:%S', time.gmtime(lowest_date))
			
	except:
		pass

	search_creation_date = ""
	try:
		query = 'https://www.google.com/search?hl=en&tbo=d&tbs=qdr:y15&q='+url
		page = commands.getoutput('curl --silent -L -A "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.112 Safari/534.30" "'+query+'"')

		
		signatureString = ' - </span>'
		locationOfSignature = 0
		lowest_date = 99999999999
		while(True):

			
			
			#firstaid, not permanent fix
			#retrieve date from preceding " - </span>" signature - start
			#this logic is meant to retrieve date from a string of form: ">DateIsHere- </span>"
			locationOfSignature = page.find(signatureString, locationOfSignature)
			timestamp = ''
			
			
			if locationOfSignature != -1:
				k = locationOfSignature
				while 1==1 and k > -1:
    					if page[k] != '>' :
						timestamp = page[k] + timestamp
					else :
						break
					k = k - 1;
				locationOfSignature = locationOfSignature + len(signatureString)

				timestamp = timestamp.strip()
			else :
				break
			#retrieve date from preceding " - </span>" signature - end
			
			#print ""
			#print "timestamp: " + timestamp
			
			epoch = int(calendar.timegm(time.strptime(timestamp, '%b %d, %Y')))

			limitEpoch = int(calendar.timegm(time.strptime("1995-01-01T12:00:00", '%Y-%m-%dT%H:%M:%S')))
			if(epoch<limitEpoch):
				continue

			if(epoch<lowest_date):
				lowest_date = epoch
			search_creation_date = time.strftime('%Y-%m-%dT%H:%M:%S', time.gmtime(lowest_date))
			
	except:
		pass
	
	lowerDate = getLowest([search_creation_date,inurl_creation_date])
	outputArray[indexOfOutputArray] = lowerDate
	print "Done Google"
	return lowerDate
Beispiel #9
0
def cd(url, backlinksFlag=False):
    print 'Getting Creation dates for: ' + url

    threads = []
    outputArray = ['', '', '', '', '', '']
    now0 = datetime.datetime.now()

    lastmodifiedThread = Thread(target=getLastModifiedDate,
                                args=(url, outputArray, 0))
    bitlyThread = Thread(target=getBitlyCreationDate,
                         args=(url, outputArray, 1))
    googleThread = Thread(target=getGoogleCreationDate,
                          args=(url, outputArray, 2))
    archivesThread = Thread(target=getArchivesCreationDate,
                            args=(url, outputArray, 3))

    if (backlinksFlag):
        backlinkThread = Thread(target=getBacklinksFirstAppearanceDates,
                                args=(url, outputArray, 4))

    topsyThread = Thread(target=getTopsyCreationDate,
                         args=(url, outputArray, 5))

    # Add threads to thread list
    threads.append(lastmodifiedThread)
    threads.append(bitlyThread)
    threads.append(googleThread)
    threads.append(archivesThread)

    if (backlinksFlag):
        threads.append(backlinkThread)

    threads.append(topsyThread)

    # Start new Threads
    lastmodifiedThread.start()
    bitlyThread.start()
    googleThread.start()
    archivesThread.start()

    if (backlinksFlag):
        backlinkThread.start()

    topsyThread.start()

    # Wait for all threads to complete
    for t in threads:
        t.join()

    # For threads
    lastmodified = outputArray[0]
    bitly = outputArray[1]
    google = outputArray[2]
    archives = outputArray[3]

    if (backlinksFlag):
        backlink = outputArray[4]
    else:
        backlink = ''

    topsy = outputArray[5]

    #note that archives["Earliest"] = archives[0][1]
    try:
        lowest = getLowest(
            [lastmodified, bitly, google, archives[0][1], backlink,
             topsy])  #for thread
    except:
        print sys.exc_type, sys.exc_value, sys.exc_traceback

    result = []

    result.append(("URI", url))
    result.append(("Estimated Creation Date", lowest))
    result.append(("Last Modified", lastmodified))
    result.append(("Bitly.com", bitly))
    result.append(("Topsy.com", topsy))
    result.append(("Backlinks", backlink))
    result.append(("Google.com", google))
    result.append(("Archives", archives))
    values = OrderedDict(result)
    r = json.dumps(values, sort_keys=False, indent=2, separators=(',', ': '))

    now1 = datetime.datetime.now() - now0

    #print "runtime in seconds: "
    #print now1.seconds
    #print r
    #print 'runtime in seconds:  ' +  str(now1.seconds) + '\n' + r + '\n'
    saveFile = open('links_output_4.txt', 'a')
    saveFile.write("{:<20} {} ".format(lowest, url))
    saveFile.write('\n')
    saveFile.close()
    return r
Beispiel #10
0
def cd(url, backlinksFlag = False):

    #print 'Getting Creation dates for: ' + url


    #scheme missing?
    parsedUrl = urlparse.urlparse(url)
    if( len(parsedUrl.scheme)<1 ):
        url = 'http://'+url
    
    
    threads = []
    outputArray =['','','','','','']
    now0 = datetime.datetime.now()
    
   
    lastmodifiedThread = Thread(target=getLastModifiedDate, args=(url, outputArray, 0))
    bitlyThread = Thread(target=getBitlyCreationDate, args=(url, outputArray, 1))
    googleThread = Thread(target=getGoogleCreationDate, args=(url, outputArray, 2))
    archivesThread = Thread(target=getArchivesCreationDate, args=(url, outputArray, 3))
    
    if( backlinksFlag ):
        backlinkThread = Thread(target=getBacklinksFirstAppearanceDates, args=(url, outputArray, 4))

    #topsyThread = Thread(target=getTopsyCreationDate, args=(url, outputArray, 5))
    

    # Add threads to thread list
    threads.append(lastmodifiedThread)
    threads.append(bitlyThread)
    threads.append(googleThread)	
    threads.append(archivesThread)

    if( backlinksFlag ):
        threads.append(backlinkThread)

    #threads.append(topsyThread)	

    
    # Start new Threads
    lastmodifiedThread.start()
    bitlyThread.start()
    googleThread.start()
    archivesThread.start()

    if( backlinksFlag ):
        backlinkThread.start()

    #topsyThread.start()

    
    # Wait for all threads to complete
    for t in threads:
        t.join()
        
    # For threads
    lastmodified = outputArray[0]
    bitly = outputArray[1] 
    google = outputArray[2] 
    archives = outputArray[3] 
    
    if( backlinksFlag ):
        backlink = outputArray[4]
    else:
        backlink = ''

    #topsy = outputArray[5]  
    
    #note that archives["Earliest"] = archives[0][1]
    try:
        #lowest = getLowest([lastmodified, bitly, google, archives[0][1], backlink, topsy]) #for thread
        lowest = getLowest([lastmodified, bitly, google, archives[0][1], backlink]) #for thread
    except:
       print sys.exc_type, sys.exc_value , sys.exc_traceback

    
    
    result = []
    
    result.append(("URI", url))
    result.append(("Estimated Creation Date", lowest))
    result.append(("Last Modified", lastmodified))
    result.append(("Bitly.com", bitly))
    result.append(("Topsy.com", "Topsy is out of service"))
    result.append(("Backlinks", backlink))
    result.append(("Google.com", google))
    result.append(("Archives", archives))
    values = OrderedDict(result)
    r = json.dumps(values, sort_keys=False, indent=2, separators=(',', ': '))
    
    now1 = datetime.datetime.now() - now0

    
    #print "runtime in seconds: " 
    #print now1.seconds
    #print r
    print 'runtime in seconds:  ' +  str(now1.seconds) + '\n' + r + '\n'

    return r
Beispiel #11
0
def getGoogleCreationDate(url, outputArray, indexOfOutputArray):
    inurl_creation_date = ""
    try:
        query = 'https://www.google.com/search?hl=en&tbo=d&tbs=qdr:y15&q=inurl:' + url + '&oq=inurl:' + url
        page = commands.getoutput(
            'curl --silent -L -A "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.112 Safari/534.30" "'
            + query + '"')

        signatureString = ' - </span>'
        locationOfSignature = 0
        lowest_date = 99999999999
        while (True):

            #retrieve date from preceding " - </span>" signature - start
            #this logic is meant to retrieve date from a string of form: ">DateIsHere- </span>"
            locationOfSignature = page.find(signatureString,
                                            locationOfSignature)
            locationOfSignature
            timestamp = ''

            if locationOfSignature != -1:
                k = locationOfSignature
                while 1 == 1 and k > -1:
                    #end marker
                    if page[k] != '>':
                        timestamp = page[k] + timestamp
                    else:
                        break
                    k = k - 1
                locationOfSignature = locationOfSignature + len(
                    signatureString)

                timestamp = timestamp.strip()
            else:
                break
            #retrieve date from preceding " - </span>" signature - end

            #print ""
            #print "timestamp: " + timestamp

            epoch = int(calendar.timegm(time.strptime(timestamp, '%b %d, %Y')))

            limitEpoch = int(
                calendar.timegm(
                    time.strptime("1995-01-01T12:00:00", '%Y-%m-%dT%H:%M:%S')))
            if (epoch < limitEpoch):
                continue

            if (epoch < lowest_date):
                lowest_date = epoch
            inurl_creation_date = time.strftime('%Y-%m-%dT%H:%M:%S',
                                                time.gmtime(lowest_date))

    except:
        pass

    search_creation_date = ""
    try:
        query = 'https://www.google.com/search?hl=en&tbo=d&tbs=qdr:y15&q=' + url
        page = commands.getoutput(
            'curl --silent -L -A "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.112 Safari/534.30" "'
            + query + '"')

        signatureString = ' - </span>'
        locationOfSignature = 0
        lowest_date = 99999999999
        while (True):

            #firstaid, not permanent fix
            #retrieve date from preceding " - </span>" signature - start
            #this logic is meant to retrieve date from a string of form: ">DateIsHere- </span>"
            locationOfSignature = page.find(signatureString,
                                            locationOfSignature)
            timestamp = ''

            if locationOfSignature != -1:
                k = locationOfSignature
                while 1 == 1 and k > -1:
                    if page[k] != '>':
                        timestamp = page[k] + timestamp
                    else:
                        break
                    k = k - 1
                locationOfSignature = locationOfSignature + len(
                    signatureString)

                timestamp = timestamp.strip()
            else:
                break
            #retrieve date from preceding " - </span>" signature - end

            #print ""
            #print "timestamp: " + timestamp

            epoch = int(calendar.timegm(time.strptime(timestamp, '%b %d, %Y')))

            limitEpoch = int(
                calendar.timegm(
                    time.strptime("1995-01-01T12:00:00", '%Y-%m-%dT%H:%M:%S')))
            if (epoch < limitEpoch):
                continue

            if (epoch < lowest_date):
                lowest_date = epoch
            search_creation_date = time.strftime('%Y-%m-%dT%H:%M:%S',
                                                 time.gmtime(lowest_date))

    except:
        pass

    lowerDate = getLowest([search_creation_date, inurl_creation_date])
    outputArray[indexOfOutputArray] = lowerDate
    print "Done Google"
    return lowerDate
Beispiel #12
0
def cd(url, backlinksFlag=False):

    # print 'Getting Creation dates for: ' + url
    # scheme missing?
    parsedUrl = urlparse.urlparse(url)
    if len(parsedUrl.scheme) < 1:
        url = "http://" + url
    threads = []
    outputArray = ["", "", "", "", "", ""]
    now0 = datetime.datetime.now()

    lastmodifiedThread = Thread(target=getLastModifiedDate, args=(url, outputArray, 0))
    bitlyThread = Thread(target=getBitlyCreationDate, args=(url, outputArray, 1))
    googleThread = Thread(target=getGoogleCreationDate, args=(url, outputArray, 2))
    archivesThread = Thread(target=getArchivesCreationDate, args=(url, outputArray, 3))

    if backlinksFlag:
        backlinkThread = Thread(target=getBacklinksFirstAppearanceDates, args=(url, outputArray, 4))

    # topsyThread = Thread(target=getTopsyCreationDate, args=(url, outputArray, 5))

    # Add threads to thread list
    threads.append(lastmodifiedThread)
    threads.append(bitlyThread)
    threads.append(googleThread)
    threads.append(archivesThread)

    if backlinksFlag:
        threads.append(backlinkThread)

    # threads.append(topsyThread)

    # Start new Threads
    lastmodifiedThread.start()
    bitlyThread.start()
    googleThread.start()
    archivesThread.start()

    if backlinksFlag:
        backlinkThread.start()

    # topsyThread.start()

    # Wait for all threads to complete
    for t in threads:
        t.join()

    # For threads
    lastmodified = outputArray[0]
    bitly = outputArray[1]
    google = outputArray[2]
    archives = outputArray[3]

    if backlinksFlag:
        backlink = outputArray[4]
    else:
        backlink = ""

    # topsy = outputArray[5]

    # note that archives["Earliest"] = archives[0][1]
    try:
        # lowest = getLowest([lastmodified, bitly, google, archives[0][1], backlink, topsy]) #for thread
        lowest = getLowest([lastmodified, bitly, google, archives[0][1], backlink])  # for thread
    except:
        print sys.exc_type, sys.exc_value, sys.exc_traceback

    result = []

    result.append(("URI", url))
    result.append(("Estimated Creation Date", lowest))
    values = OrderedDict(result)
    r = json.dumps(values, sort_keys=False, indent=2, separators=(",", ": "))

    now1 = datetime.datetime.now() - now0

    return r
Beispiel #13
0
    def cd(self, url):

        if len(url) < 1:
            return "Url length less than 1"

        # scheme missing?
        parsedUrl = urlparse.urlparse(url)
        if len(parsedUrl.scheme) < 1:
            url = "http://" + url

        response = cherrypy.response
        response.headers["Content-Type"] = "application/json"

        print "Getting Creation dates for: " + url

        threads = []
        outputArray = ["", "", "", "", "", ""]
        now0 = datetime.datetime.now()

        lastmodifiedThread = Thread(target=getLastModifiedDate, args=(url, outputArray, 0))
        bitlyThread = Thread(target=getBitlyCreationDate, args=(url, outputArray, 1))
        googleThread = Thread(target=getGoogleCreationDate, args=(url, outputArray, 2))
        archivesThread = Thread(target=getArchivesCreationDate, args=(url, outputArray, 3))
        backlinkThread = Thread(target=getBacklinksFirstAppearanceDates, args=(url, outputArray, 4))
        topsyThread = Thread(target=getTopsyCreationDate, args=(url, outputArray, 5))

        # Add threads to thread list
        threads.append(lastmodifiedThread)
        threads.append(bitlyThread)
        threads.append(googleThread)
        threads.append(archivesThread)
        threads.append(backlinkThread)
        threads.append(topsyThread)

        # Start new Threads
        lastmodifiedThread.start()
        bitlyThread.start()
        googleThread.start()
        archivesThread.start()
        backlinkThread.start()
        topsyThread.start()

        # Wait for all threads to complete
        for t in threads:
            t.join()

        # For threads
        lastmodified = outputArray[0]
        bitly = outputArray[1]
        google = outputArray[2]
        archives = outputArray[3]
        backlink = outputArray[4]
        topsy = outputArray[5]

        # note that archives["Earliest"] = archives[0][1]
        try:
            lowest = getLowest([lastmodified, bitly, google, archives[0][1], backlink, topsy])  # for thread
        except:
            print sys.exc_type, sys.exc_value, sys.exc_traceback

        result = []

        result.append(("URI", url))
        result.append(("Estimated Creation Date", lowest))
        result.append(("Last Modified", lastmodified))
        result.append(("Bitly.com", bitly))
        result.append(("Topsy.com", topsy))
        result.append(("Backlinks", backlink))
        result.append(("Google.com", google))
        result.append(("Archives", archives))
        values = OrderedDict(result)
        r = json.dumps(values, sort_keys=False, indent=2, separators=(",", ": "))

        now1 = datetime.datetime.now() - now0

        # print "runtime in seconds: "
        # print now1.seconds
        # print r
        print "runtime in seconds:  " + str(now1.seconds) + "\n" + r + "\n"
        return r
Beispiel #14
0
def cd(url, backlinksFlag=False):

    #print 'Getting Creation dates for: ' + url

    #scheme missing?
    parsedUrl = urlparse.urlparse(url)
    if (len(parsedUrl.scheme) < 1):
        url = 'http://' + url

    threads = []
    outputArray = ['', '', '', '', '', '']
    now0 = datetime.datetime.now()

    lastmodifiedThread = Thread(target=getLastModifiedDate,
                                args=(url, outputArray, 0))
    bitlyThread = Thread(target=getBitlyCreationDate,
                         args=(url, outputArray, 1))
    googleThread = Thread(target=getGoogleCreationDate,
                          args=(url, outputArray, 2))
    archivesThread = Thread(target=getArchivesCreationDate,
                            args=(url, outputArray, 3))

    if (backlinksFlag):
        backlinkThread = Thread(target=getBacklinksFirstAppearanceDates,
                                args=(url, outputArray, 4))

    #topsyThread = Thread(target=getTopsyCreationDate, args=(url, outputArray, 5))

    # Add threads to thread list
    threads.append(lastmodifiedThread)
    threads.append(bitlyThread)
    threads.append(googleThread)
    threads.append(archivesThread)

    if (backlinksFlag):
        threads.append(backlinkThread)

    #threads.append(topsyThread)

    # Start new Threads
    lastmodifiedThread.start()
    bitlyThread.start()
    googleThread.start()
    archivesThread.start()

    if (backlinksFlag):
        backlinkThread.start()

    #topsyThread.start()

    # Wait for all threads to complete
    for t in threads:
        t.join()

    # For threads
    lastmodified = outputArray[0]
    bitly = outputArray[1]
    google = outputArray[2]
    archives = outputArray[3]

    if (backlinksFlag):
        backlink = outputArray[4]
    else:
        backlink = ''

    #topsy = outputArray[5]

    #note that archives["Earliest"] = archives[0][1]
    try:
        #lowest = getLowest([lastmodified, bitly, google, archives[0][1], backlink, topsy]) #for thread
        lowest = getLowest(
            [lastmodified, bitly, google, archives[0][1],
             backlink])  #for thread
    except:
        print sys.exc_type, sys.exc_value, sys.exc_traceback

    result = []

    result.append(("URI", url))
    result.append(("Estimated Creation Date", lowest))

    values = OrderedDict(result)
    r = json.dumps(values, sort_keys=False, indent=2, separators=(',', ': '))

    now1 = datetime.datetime.now() - now0

    #print "runtime in seconds: "
    #print now1.seconds
    #print r
    print 'runtime in seconds:  ' + str(now1.seconds) + '\n' + r + '\n'

    return r
Beispiel #15
0
    def cd(self, url):

        if(len(url) < 1):
            return "Url length less than 1"

        response = cherrypy.response
        response.headers['Content-Type'] = 'application/json'

        print 'Getting Creation dates for: ' + url

        threads = []
        outputArray =['','','','','','']
        now0 = datetime.datetime.now()
        
       
        lastmodifiedThread = Thread(target=getLastModifiedDate, args=(url, outputArray, 0))
        bitlyThread = Thread(target=getBitlyCreationDate, args=(url, outputArray, 1))
        googleThread = Thread(target=getGoogleCreationDate, args=(url, outputArray, 2))
        archivesThread = Thread(target=getArchivesCreationDate, args=(url, outputArray, 3))
        backlinkThread = Thread(target=getBacklinksFirstAppearanceDates, args=(url, outputArray, 4))
        topsyThread = Thread(target=getTopsyCreationDate, args=(url, outputArray, 5))
        

        # Add threads to thread list
        threads.append(lastmodifiedThread)
        threads.append(bitlyThread)
        threads.append(googleThread)	
        threads.append(archivesThread)
        threads.append(backlinkThread)
        threads.append(topsyThread)	

        
        # Start new Threads
        lastmodifiedThread.start()
        bitlyThread.start()
        googleThread.start()
        archivesThread.start()
        backlinkThread.start()
        topsyThread.start()

        
        # Wait for all threads to complete
        for t in threads:
            t.join()
            
        # For threads
        lastmodified = outputArray[0]
        bitly = outputArray[1] 
        google = outputArray[2] 
        archives = outputArray[3] 
        backlink = outputArray[4]
        topsy = outputArray[5]  
        
        #note that archives["Earliest"] = archives[0][1]
        try:
            lowest = getLowest([lastmodified, bitly, google, archives[0][1], backlink, topsy]) #for thread
        except:
           print sys.exc_type, sys.exc_value , sys.exc_traceback
        
        

        result = []
        
        result.append(("URI", url))
        result.append(("Estimated Creation Date", lowest))
        result.append(("Last Modified", lastmodified))
        result.append(("Bitly.com", bitly))
        result.append(("Topsy.com", topsy))
        result.append(("Backlinks", backlink))
        result.append(("Google.com", google))
        result.append(("Archives", archives))
        values = OrderedDict(result)
        r = json.dumps(values, sort_keys=False, indent=2, separators=(',', ': '))
        
        now1 = datetime.datetime.now() - now0

        
        #print "runtime in seconds: " 
        #print now1.seconds
        #print r
        print 'runtime in seconds:  ' +  str(now1.seconds) + '\n' + r + '\n'
        return r
Beispiel #16
0
def cd(url, backlinksFlag = False):

    #print 'Getting Creation dates for: ' + url


    #scheme missing?
    parsedUrl = urlparse.urlparse(url)
    if( len(parsedUrl.scheme)<1 ):
        url = 'http://'+url
    
    
    threads = []
    outputArray =['','','','','','']
    now0 = datetime.datetime.now()
    
   
    lastmodifiedThread = Thread(target=getLastModifiedDate, args=(url, outputArray, 0))
    bitlyThread = Thread(target=getBitlyCreationDate, args=(url, outputArray, 1))
    googleThread = Thread(target=getGoogleCreationDate, args=(url, outputArray, 2))
    archivesThread = Thread(target=getArchivesCreationDate, args=(url, outputArray, 3))
    
    if( backlinksFlag ):
        backlinkThread = Thread(target=getBacklinksFirstAppearanceDates, args=(url, outputArray, 4))

    #topsyThread = Thread(target=getTopsyCreationDate, args=(url, outputArray, 5))
    

    # Add threads to thread list
    threads.append(lastmodifiedThread)
    threads.append(bitlyThread)
    threads.append(googleThread)	
    threads.append(archivesThread)

    if( backlinksFlag ):
        threads.append(backlinkThread)

    #threads.append(topsyThread)	

    
    # Start new Threads
    lastmodifiedThread.start()
    bitlyThread.start()
    googleThread.start()
    archivesThread.start()

    if( backlinksFlag ):
        backlinkThread.start()

    #topsyThread.start()

    
    # Wait for all threads to complete
    for t in threads:
        t.join()
        
    # For threads
    lastmodified = outputArray[0]
    bitly = outputArray[1] 
    google = outputArray[2] 
    archives = outputArray[3] 
    
    if( backlinksFlag ):
        backlink = outputArray[4]
    else:
        backlink = ''

   
    
  
    try:

        lowest = getLowest([lastmodified, bitly, google, archives[0][1], backlink]) #for thread
    except:
       print sys.exc_type, sys.exc_value , sys.exc_traceback

    
    
 
    
    file2=open('dates.csv','a')
    print lowest
    file2.write("%s\n"% lowest)