def getdate(uri): uri = uri.strip() print 'Searching for uri: {}'.format(uri) uri_json = json.loads(cd(uri)) if uri_json[ECD]: print 'Found creation date: {}'.format(uri_json[ECD]) return (uri, uri_json[ECD]) else: print 'Found no ECD' return None
# ===================================================================== # Iterate it for 1000 urls # 1000 urls should be taken from twitter, based on 'Problem 1' # so just read file 'urls.csv' # --------------------------------------------------------------------- age_mementos = [] no_memento = 0 no_creation_date = 0 with open('urls.csv', 'rb') as f: reader = csv.reader(f) for row in reader: url = row[0] # call function named 'cd' inside 'local.py' result = local.cd(url, True) result = json.loads(result) creationDate = result['Estimated Creation Date'] mementos = result['Archives'][1][1] numMementos = len(mementos) if numMementos > 0 and not creationDate == "": # because creationDate is in unicode format, so we need to convert it creationDate = datetime.strptime(creationDate, '%Y-%m-%dT%H:%M:%S') age = datetime.now() - creationDate ageInDays = age.days # save it in array age_mementos.append([numMementos, ageInDays]) if numMementos == 0: no_memento = no_memento + 1
import local with open("links.txt") as f: ListOfLinks = f.readlines() fOut = open("DateEstimation.txt", "w") i = 0 for link in ListOfLinks: #get URI uri = link.split(" ") print uri[2], " ", uri[3] i+=1 maybe = local.cd(link) maybe = maybe.split('Estimated Creation Date": "') maybe = str(maybe[1]) maybe = maybe.split('"') print "\n", i, ": ", uri[2] ," ", uri[3] , " " ,maybe[0] fOut.write(str(i)) fOut.write(": ") fOut.write(uri[2]) fOut.write(" ") fOut.write(uri[3]) fOut.write(" ") if maybe[0] != "": fOut.write(maybe[0]) else: fOut.write("NODATE") fOut.write("\n")
from local import cd import json import datetime time_now=datetime.datetime.now() data=open("../../momento.list") outputf=open('../../cd.list','w') strline=data.readlines() counter=0 for line in strline: col=line.split('\t') memento=int(col[1]) if memento>0: r=cd(col[0]) re=json.loads(r) if re['Estimated Creation Date'] != '': createTime=datetime.datetime.strptime(re['Estimated Creation Date'],'%Y-%m-%dT%H:%M:%S') deltaTime=time_now-createTime outputf.write(str(deltaTime.days)+"\t"+str(col[1]).strip()+"\n") counter=counter+1 print(str(counter/10.0)+"%") data.close() outputf.close()