def run(self,data): for case in data: origin = self.normaliz(case.target) site = get_url_site(origin) if site in ["m.facebook.com","id-id.facebook.com"]: if "profile.php?id=" in origin and "refsrc=" in origin: refsrc = get_query(origin,"refsrc") origin = urllib.unquote(refsrc.replace("%3A",":").replace("%2F","/")).replace(" ","%20") else: origin = remove_query(origin,"refsrc") case.add_common(origin) case.target = replace_site(origin,"www.facebook.com") elif site == "mobile.twitter.com": case.add_common(origin) case.target = replace_site(origin,"twitter.com") elif site == "m.youtube.com": case.add_common(origin) case.target = replace_site(origin,"www.youtube.com") elif site.endswith("blogspot.com") and is_m1(origin): case.add_common(origin) case.target = remove_m1(origin) elif site == "play.google.com": origin = remove_query(origin,"referrer") origin = remove_query(origin,"pcampaignid") origin = remove_query(origin,"utm_term") origin = remove_query(origin,"utm_medium") origin = remove_query(origin,"hl") #case.add_common(origin) case.target = origin elif site == "m.stafaband.info" : case.add_common(origin) case.target = replace_site(origin,"www.stafaband.info") #elif site == "m.olx.co.id": # origin = remove_query(origin,"redirect") # case.target = origin elif site == "anjingkita.com": case.add_common(origin) case.target = replace_site(origin,"www.anjingkita.com") elif site == "m.imdb.com": case.add_common(origin) case.target = replace_site(origin,"www.imdb.com") REPLACE = self.settings["CONVERT_REPLACE"] if case.target in REPLACE: case.target = REPLACE[case.target] case.target = self.normaliz(case.target)
def run(self, data): for case in data: origin = case.target if origin.startswith("https://"): origin = "http://" + origin[8:] if "#" in origin: origin = origin.split("#", 1)[0] case.target = origin site = get_url_site(origin) if site in ["m.facebook.com", "id-id.facebook.com"]: origin = remove_query(origin, "refsrc") case.add_common(origin) case.target = replace_site(origin, "www.facebook.com") elif site == "mobile.twitter.com": case.add_common(origin) case.target = replace_site(origin, "twitter.com") elif site == "m.youtube.com": case.add_common(origin) case.target = replace_site(origin, "www.youtube.com")
def run(self,data): for case in data: if case.close and case.result.get("conclusion") in ["noProblem","robots"]: continue # robots = case.get_data("robots") # if not robots or robots.get('robots') != "DISALLOW": # return site = get_url_site(case.target) if ".wapka.me" in site or ".wapka.mobi" in site: case.set_result("conclusion","Forbidden") case.set_result("reason","ip") case.close = True continue forbid = case.get_data("forbid") if not forbid: continue if "forbidden" in forbid and forbid["forbidden"]: case.set_result("conclusion","Forbidden") case.set_result("reason",forbid["forbidden"]) if "out" in forbid and forbid["out"]: case.set_result("conclusion","Forbidden_nm") case.set_result("additional",forbid["out"]) case.close = True return
def run(self,data): for case in data: if case.close: continue ld = case.get_data("linkbase") l2patch = case.get_data("l2patch") l2base = case.get_data("l2base") site = get_url_site(case.target) if ld and l2patch and l2base and ld.get("urlnew") == "-" and l2patch.get("urlnew") == "-" and l2base.get("urlnew") == "-": case.set_result("conclusion","notFound") case.set_result("owner","*****@*****.**") case.close = True continue if ld: #import pdb #pdb.set_trace() urlnew = ld.get("urlnew") try: weight = int(ld.get("weight")) except: weight = 0 try: wise = int(ld.get("Wise")) except: wise = -1 if urlnew == "CHK": if weight == 9 or wise >0 or (weight >10 and self.is_pc(site) ) : case.set_result("conclusion","noProblem") case.set_result("reason","wise=%d&&weight=%d"%(wise,weight)) #case.set_result("additional","pcccdb") case.close = True case.ok = True elif weight >10 : case.set_result("conclusion","wiseEorr") case.set_result("reason","wise=%d&&weight=%d"%(wise,weight)) #case.set_result("additional","pcccdb") case.close = True else: case.set_result("conclusion","weight%d"%weight) case.set_result("reason","wise=%d&&weight=%d"%(wise,weight)) #case.set_result("additional","pcccdb") case.close = True # case.set_result("conclusion","lcDiff") # case.close = True continue elif urlnew == "GET": url_level = ld.get("url_level") forceGET = ld.get("forceGET") crawl_fail = ld.get("crawl_fail") del_reason = ld.get("del_reason") if crawl_fail == True: case.set_result("conclusion","crawlFail") case.set_result("reason","crawl_total:%d&&crawl_fail:%d"%(ld.get("craw_count"),ld.get("fail_count"))) case.close = True elif url_level in ["1","0"]: case.set_result("conclusion","lowLevel") case.set_result("reason","url_level=%s"%url_level) case.close = True elif del_reason == "0" : case.set_result("reason","urlnew=GET&&url_level=%s&&forceGET=%s"%(url_level,forceGET)) case.set_result("conclusion","unCrawl") #case.set_result("owner","*****@*****.**") case.close = True continue elif del_reason != "0" : case.set_result("reason","del_reason=%s"%del_reason) case.set_result("conclusion","del_reason=%s"%del_reason) #case.set_result("owner","*****@*****.**") case.close = True continue #continue if case.close == True:continue if l2patch and "del_reason" in l2patch and l2patch["del_reason"] != "-" : case.set_result("conclusion","del_reason="+l2patch["del_reason"]) case.set_result("reason","del_reason="+l2patch["del_reason"]) case.set_result("owner","*****@*****.**") case.close = True continue if l2base and "del_reason" in l2base and l2base["del_reason"] != "-" : case.set_result("conclusion","del_reason="+l2base["del_reason"]) case.set_result("reason",l2base["del_reason"]) case.set_result("owner","*****@*****.**") case.close = True continue pass