def main(): impath = "/home/john/wsdlims_ripped/ECIR2016TurkData/screenshots" # args["ip"] compath = "/home/john/wsdlims_ripped/ECIR2016TurkData/composites" # args["cp"] goodUris = [] with open("gooduris_20160225.txt", "r") as read: for uri in map(lambda line: line.rstrip("\n"), read): goodUris.append(uri) compisits = get_files(compath, lambda f: "allTheSame" not in f and check_if_goodURI(f, goodUris) and "interval" not in f) method_composites = defaultdict(dict) for comp in sorted(compisits): site = comp[comp.find("_") + 1:comp.rfind("_")] method_composites[comp[:comp.index("_")]][site] = comp # composite_only_histogram(method_composites,compath) files = get_and_process_thumbs(impath, method_composites, goodUris) print(type(files)) # print(method_composites) impath += "/" methods = {'random': MethodCompThums('random', impath, files["random"]), 'temporalInterval': MethodCompThums('temporalInterval', impath, files["temporalInterval"]), 'alSum': MethodCompThums('alSum', impath, files["alSum"])} # thumbThumbAnalysis(methods['alSum'], methods['random'], methods['temporalInterval']) temporalPairs(methods['alSum'], methods['random'], methods['temporalInterval'])
def colorAnalysis(): impath = "/home/john/wsdlims_ripped/ECIR2016TurkData/screenshots" # args["ip"] compath = "/home/john/wsdlims_ripped/ECIR2016TurkData/composites" # args["cp"] goodUris = [] with open("gooduris_20160225.txt", "r") as read: for uri in map(lambda line: line.rstrip("\n"), read): goodUris.append(uri) compisits = get_files(compath, lambda f: "allTheSame" not in f and check_if_goodURI(f, goodUris) and "interval" not in f) method_composites = defaultdict(dict) for comp in sorted(compisits): site = comp[comp.find("_") + 1:comp.rfind("_")] method_composites[comp[:comp.index("_")]][site] = comp # composite_only_histogram(method_composites,compath) files = get_and_process_thumbs(impath, method_composites, goodUris) print(type(files)) # print(method_composites) impath += "/" methods = {'random': MethodCompThums('random', impath, files["random"]), 'temporalInterval': MethodCompThums('temporalInterval', impath, files["temporalInterval"]), 'alSum': MethodCompThums('alSum', impath, files["alSum"])} # type: dict[str,MethodCompThums] alsum = methods['alSum'] out = {} # type: dict[str,dict[str,CompositeColorResulst]] for mname, method in methods.items(): print(mname,method) dcm = method.get_composite_dom_colors() out[mname] = dcm try: with open("colorResults2.json","w+") as wout: wout.write(json.dumps(out,indent=1,default=lambda x:x.to_jdic())) except TypeError as e: print("Wow bad thing happened",e) for k,v in out.items(): print("+++++++++++++++++++++++++++++++++++++++++++++++++") print(k) for site,ret in v.items(): print("site: ",site) for date,color in ret.results.items(): print(date,''.join(color))
def long(): impath = "/home/john/wsdlims_ripped/ECIR2016TurkData/screenshots" # args["ip"] compath = "/home/john/wsdlims_ripped/ECIR2016TurkData/composites" # args["cp"] goodUris = [] origuris = [] with open("gooduris_20160225.txt", "r") as read: for uri in map(lambda line: line.rstrip("\n"), read): goodUris.append(uri) with open("origuris.txt", "r") as read: for uri in map(lambda line: line.rstrip("\n"), read): origuris.append(uri) compisits = get_files(impath, lambda f: filterASI(f) and check_if_goodURI(f, goodUris)) useragent = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:44.0) Gecko/20100101 Firefox/44.01' session = requests.Session() session.headers.update({'User-Agent': useragent}) got = {} gotURIs = [] with open("tms2.json", "w+") as out: out.write("{ tms:[") for it in sorted(origuris): # tm = TimeMap("www.%s"%it) # print(tm.mementos) request = session.get("http://web.archive.org/web/timemap/json/www.%s" % it) # got[it] = {"tmuri":"http://web.archive.org/web/timemap/json/www.%s"%it,'uri':it,"tms":json.loads( # request.text)} try: got = json.loads(request.text) jsn = json.dumps(got) print(jsn + "\n") out.write("[" + jsn + "],\n") gotURIs.append(it) except ValueError: print(request.text) print(request.headers) print("\n\n") session.close()
def generate(): impath = "/home/john/wsdlims_ripped/ECIR2016TurkData/screenshots" # args["ip"] compath = "/home/john/wsdlims_ripped/ECIR2016TurkData/composites" # args["cp"] with open("tms2.json", "r") as tm: it = json.load(tm) goodUris = [] with open("gooduris_20160225.txt", "r") as read: for uri in map(lambda line: line.rstrip("\n"), read): goodUris.append(uri) color = {} # type: dict[str,cs] with open("temporalPairs.csv","r") as read: reader = csv.DictReader(read) for row in reader: color[row['site']] = cs(row) with open("compositeToComposite.csv","r") as read: reader = csv.DictReader(read) for row in reader: arsim = row['alsumRandomSim'] atsim = row['alsumTemporalSim'] color[row['site']].ctcRsim = arsim color[row['site']].ctcTsim = atsim with open("alSumVSrandom_onetoone.csv","r") as read: reader = csv.DictReader(read) for row in reader: arsim = row['average'] color[row['site']].otoRsim = arsim with open("alSumVStemporalInterval_onetoone.csv","r") as read: reader = csv.DictReader(read) for row in reader: arsim = row['average'] color[row['site']].otoTsim = arsim with open("wins.csv","r") as read: reader = csv.DictReader(read) for row in reader: if color.get(row['site'],None) is not None: color[row['site']].won['r'] = row['awr'] color[row['site']].won['ti'] = row['awt'] tms = it['tms'] timeMaps = {} # type: dict[str,TM] for s in tms: it = TM(s) timeMaps[it.getURIKey()] = it tmk = list(filter(lambda x: len(x) > 2, timeMaps.keys())) compisits = get_files(compath, lambda f: "allTheSame" not in f and check_if_goodURI(f, goodUris) and "interval" not in f) print(compisits) uniqueComposite = set() for c in compisits: uniqueComposite.add(gsite(c)) compisits = sorted(list(uniqueComposite)) # 640 641 # self.site, self.alSum, self.random, self.aVr, self.temporal, self.aVt, # self.tmNumMementos, self.tmTimeSpan, # self.tmNumM2k, self.tmTimeSpan2k, self.tmNumM05k, self.tmTimeSpan05k # self.won['Random'], # self.won['TemporalInterval'] with open("allTm2.csv","w+") as out: out.write("site,ah,mh,mdif,nmemento,timespan,nummtwo,twotimespan,numof,timespanof,aWP,moto,mtcr,method\n") for c in sorted(compisits): # print(c) for tmkey in filter(lambda x: len(x) > 2, tmk): if tmkey in c: print(timeMaps[tmkey].timeSpan(), timeMaps[tmkey].numMentos, timeMaps[tmkey].timeSpanAfter(2000), timeMaps[tmkey].numMementosAfter(2000), timeMaps[tmkey].timeSpanAfter(2005), timeMaps[tmkey].numMementosAfter(2005)) cc = color[tmkey] cc.setTMInfo(timeMaps[tmkey].timeSpan(), timeMaps[tmkey].numMentos, timeMaps[tmkey].timeSpanAfter(2000), timeMaps[tmkey].numMementosAfter(2000), timeMaps[tmkey].timeSpanAfter(2005), timeMaps[tmkey].numMementosAfter(2005)) out.write(cc.getRString()) out.write(cc.getTString()) print("______________________________")