Beispiel #1
0
    def run(self,data):

        for case in data:

            origin = self.normaliz(case.target)
            site = get_url_site(origin)
            if site in ["m.facebook.com","id-id.facebook.com"]:
                if "profile.php?id=" in origin and "refsrc=" in origin:
                    refsrc = get_query(origin,"refsrc")
                    origin = urllib.unquote(refsrc.replace("%3A",":").replace("%2F","/")).replace(" ","%20")
                else:
                    origin = remove_query(origin,"refsrc")
                
                case.add_common(origin)
                case.target = replace_site(origin,"www.facebook.com")
                 

            elif site == "mobile.twitter.com":
                case.add_common(origin)
                case.target = replace_site(origin,"twitter.com")
            elif site == "m.youtube.com":
                case.add_common(origin)
                case.target = replace_site(origin,"www.youtube.com")
            elif site.endswith("blogspot.com")  and is_m1(origin):

                case.add_common(origin)
                case.target = remove_m1(origin) 
            elif site == "play.google.com":
                origin = remove_query(origin,"referrer")
                origin = remove_query(origin,"pcampaignid")
                origin = remove_query(origin,"utm_term")
                origin = remove_query(origin,"utm_medium")
                origin = remove_query(origin,"hl")
                #case.add_common(origin)
                case.target = origin 
            elif site == "m.stafaband.info" :
                case.add_common(origin)
                case.target = replace_site(origin,"www.stafaband.info")             
            #elif site == "m.olx.co.id":
             #   origin = remove_query(origin,"redirect")
             #   case.target = origin
            elif site == "anjingkita.com":
                case.add_common(origin)
                case.target = replace_site(origin,"www.anjingkita.com")
            elif site == "m.imdb.com":
                case.add_common(origin)
                case.target = replace_site(origin,"www.imdb.com")                

            REPLACE = self.settings["CONVERT_REPLACE"]
            if case.target in REPLACE:
                case.target = REPLACE[case.target]


            case.target = self.normaliz(case.target)
Beispiel #2
0
    def run(self, data):

        for case in data:
            origin = case.target
            if origin.startswith("https://"):
                origin = "http://" + origin[8:]
            if "#" in origin:
                origin = origin.split("#", 1)[0]
            case.target = origin

            site = get_url_site(origin)
            if site in ["m.facebook.com", "id-id.facebook.com"]:
                origin = remove_query(origin, "refsrc")
                case.add_common(origin)
                case.target = replace_site(origin, "www.facebook.com")

            elif site == "mobile.twitter.com":
                case.add_common(origin)
                case.target = replace_site(origin, "twitter.com")
            elif site == "m.youtube.com":
                case.add_common(origin)
                case.target = replace_site(origin, "www.youtube.com")
Beispiel #3
0
 def run(self,data):
     for case in data:
         if case.close and case.result.get("conclusion") in ["noProblem","robots"]:
             continue
         # robots = case.get_data("robots")
         # if  not robots or robots.get('robots') != "DISALLOW":
         #     return
         site = get_url_site(case.target)
         if ".wapka.me" in site  or ".wapka.mobi" in site:
             case.set_result("conclusion","Forbidden")
             case.set_result("reason","ip")
             case.close = True
             continue
         forbid = case.get_data("forbid")
         if not forbid:
             continue
         if "forbidden" in forbid and forbid["forbidden"]:
             case.set_result("conclusion","Forbidden")
             case.set_result("reason",forbid["forbidden"])
         if "out" in forbid and forbid["out"]:
             case.set_result("conclusion","Forbidden_nm")
             case.set_result("additional",forbid["out"])
             case.close = True
     return                
Beispiel #4
0
    def run(self,data):
        
        for case in data:
            if case.close:
                continue
            ld = case.get_data("linkbase")
            l2patch = case.get_data("l2patch")
            l2base = case.get_data("l2base")
            site = get_url_site(case.target)

            if ld and l2patch and l2base and ld.get("urlnew") == "-" and  l2patch.get("urlnew") == "-" and  l2base.get("urlnew") == "-":
                    case.set_result("conclusion","notFound")
                    case.set_result("owner","*****@*****.**")
                    case.close = True
                    continue
            if ld:
                #import pdb
                #pdb.set_trace()
                urlnew = ld.get("urlnew")
                try:
                    weight = int(ld.get("weight"))
                except:
                    weight = 0
                try:
                    wise = int(ld.get("Wise"))
                except:
                    wise = -1
                if urlnew == "CHK":
                    if  weight == 9 or wise >0  or (weight >10 and self.is_pc(site) ) :
                        case.set_result("conclusion","noProblem")
                        case.set_result("reason","wise=%d&&weight=%d"%(wise,weight))
                        #case.set_result("additional","pcccdb")
                        case.close = True
                        case.ok = True
                    elif weight >10 :
                        case.set_result("conclusion","wiseEorr")
                        case.set_result("reason","wise=%d&&weight=%d"%(wise,weight))
                        #case.set_result("additional","pcccdb")
                        case.close = True

                    else:
                        case.set_result("conclusion","weight%d"%weight)
                        case.set_result("reason","wise=%d&&weight=%d"%(wise,weight))
                        #case.set_result("additional","pcccdb")
                        case.close = True                                               
                    # case.set_result("conclusion","lcDiff")
                    # case.close = True
                    continue
                elif urlnew == "GET":
                    url_level  = ld.get("url_level")
                    forceGET  = ld.get("forceGET")
                    crawl_fail = ld.get("crawl_fail")
                    del_reason = ld.get("del_reason")
                    if crawl_fail == True:
                        case.set_result("conclusion","crawlFail")
                        case.set_result("reason","crawl_total:%d&&crawl_fail:%d"%(ld.get("craw_count"),ld.get("fail_count")))
                        case.close = True

                    elif url_level in ["1","0"]:
                        case.set_result("conclusion","lowLevel")
                        case.set_result("reason","url_level=%s"%url_level)
                        case.close = True
                    elif del_reason == "0" :
                        case.set_result("reason","urlnew=GET&&url_level=%s&&forceGET=%s"%(url_level,forceGET))
                        case.set_result("conclusion","unCrawl")
                        #case.set_result("owner","*****@*****.**")
                        case.close = True
                        continue
                    elif del_reason != "0" :
                        case.set_result("reason","del_reason=%s"%del_reason)
                        case.set_result("conclusion","del_reason=%s"%del_reason)       
                        #case.set_result("owner","*****@*****.**")

                        case.close = True
                        continue
        
                    #continue
            if case.close == True:continue
            if l2patch and "del_reason" in l2patch and l2patch["del_reason"] != "-" :
                case.set_result("conclusion","del_reason="+l2patch["del_reason"])
                case.set_result("reason","del_reason="+l2patch["del_reason"])
                case.set_result("owner","*****@*****.**") 
                case.close = True
                continue
            
            if l2base and "del_reason" in l2base and l2base["del_reason"] != "-" :
                case.set_result("conclusion","del_reason="+l2base["del_reason"])
                case.set_result("reason",l2base["del_reason"])
                case.set_result("owner","*****@*****.**") 
                case.close = True
                continue



        pass