Ejemplo n.º 1
0
 def run(self):
     p=Psyco(self.__fpath)
     crawler=GetPage(self.__proxy)
     opener=crawler.getOpener()
     
     html=opener.open("https://oig.hhs.gov/fraud/fugitives/profiles.asp#allonce",None,120).read()
     try:
         spynMap={
         }
         
         confMap={
             "fpath":self.__fpath
         }
         
         cats={
             "charges":"/home/aevans/Documents/cats/finance/charges.txt",
             "attacks":"/home/aevans/Documents/cats/finance/attacks.txt",
             "convicitons":"/home/aevans/Documents/cats/finance/convictions.txt",
             "family":"/home/aevans/Documents/cats/finance/family.txt",
             "finance":"/home/aevans/Documents/cats/terror/finance.txt"
         }
         
         parser=Parser(cats)
         parser.parsePage(html,0)
     except Exception,e:
         self.__err.crawlerFail(e,traceback.extract_tb(sys.exc_info()[2]), True)
Ejemplo n.º 2
0
 def run(self):
     p=Psyco(self.__fpath)
     crawler=GetPage(self.__proxy)
     opener=crawler.getOpener()
     
     html=opener.open("http://www.nationalsecurity.gov.au/Listedterroristorganisations/Pages/default.aspx",None,120).read()
     try:
         spynMap={
         }
         
         confMap={
             "fpath":self.__fpath
         }
         
         cats={
             "targets":"/home/aevans/Documents/cats/terror/targets.txt",
             "activities":"/home/aevans/Documents/cats/terror/activities.txt",
             "attacks":"/home/aevans/Documents/cats/terror/attacks.txt",
             "finance":"/home/aevans/Documents/cats/terror/finance.txt",
             "charges":"/home/aevans/Documents/cats/terror/charges.txt",
             "convictions":"/home/aevans/Documents/cats/terror/convictions.txt",
             "risk":"/home/aevans/Documents/cats/terror/risk.txt",
             "leadership":"/home/aevans/Documents/cats/terror/leadership.txt",
             "background":"/home/aevans/Documents/cats/terror/background.txt",
             "disclaimer":"/home/aevans/Documents/cats/terror/disclaimer.txt",
             "family":"/home/aevans/Documents/cats/terror/family.txt",
             "noninfo":"/home/aevans/Documents/cats/terror/noninfo.txt",
             "recruitment":"/home/aevans/Documents/cats/terror/nrecruitment.txt"
         }
         
         parseMap=[{
            "class":Parser(cats),
            "passSpynner":False
         }]
         
         pages,faillist=crawler.loop(html, linktag="a", linkattr="href", linklimiters={"href":re.compile("www.nationalsecurity.gov.au\/Listedterroristorganisations\/")}, pageId=self.__pid, maxproxies=self.__maxproxies, spynnerMap=spynMap, opener=opener, waitload=120,proxy=self.__proxy, hashName="hash", table="au_parse_test.html", test=False, printTest=False, faillist=[], database=p, cfpMap=confMap, parserClassMap=parseMap, commitSize=100, checkLinks=True)
         p.execute("INSERT INTO au_parse_test.terms VALUES('finished')")
     except Exception,e:
         self.__err.crawlerFail(e,traceback.extract_tb(sys.exc_info()[2]), True)