def setUp(self): self._url = 'https://www.google.co.jp/search?q=ruby' self._testdatadir = "./tests/data/" self._transaction_id = "transaction_test" self._validator = CategoryValidator(self._url, self._transaction_id) self._scrapyer = GoogSearchScrapyer() self._scoreler = StdScoreler() self._relatedurls = ["https://facebook.com", "https://instagram.com"] #categorysetterexe exepath = "ls" self._infile = self._testdatadir + self._transaction_id + "1.scraped" self._outfile = self._testdatadir + self._transaction_id + "1.categorized" self._categorysetter = CategorySetterExe(exepath, self._infile, self._outfile) rm_quoat = lambda val: re.sub(r'\"', '', val)
i=0 with open(self._no_cat_url_list) as fo: with JSONStreamWriter.ArrayWriter(result_json) as jstream: for url in fo: if url.strip()== "" : continue transaction_id = str(time.time()).replace(".","_") try: #set categorysetter infile = tmpdir + "/" + self._transaction_id + str(i+1) + ".scraped" outfile = tmpdir + "/" + self._transaction_id + str(i+1) + ".categorized" categorysetter = CategorySetterExe(exepath,infile,outfile) #validate URL wkValidator = CategoryValidator(url,tmpdir + "/" + transaction_id + str(i)) category = wkValidator.do(scraper,scoreler,categorysetter) #output result to a jsonformatted file. writer.output(result_json,wkValidator) #evt = fmter.Urls2Json(url,category,wkValidator.getDetail(),transaction_id) #jstream.write(evt) #delete tmpfile .straped and .categorized #under construction os.remove(infile) os.remove(outfile) i=i+1 except Exception as e: print e