from IoHelper import IoHelper from WebHelper import WebHelper from NltkHelper import NltkHelper #reading the url supplied as command line argument url = str(sys.argv[1]) #reading the count supplied as command line argument try: count = int(sys.argv[2]) except Exception as e: count = 10 #print (url) if not Helpers.urlValidator(url): print ("URL entered is not valid") sys.exit() webPage = WebHelper(url) #parsing readable text from the html page below txt = webPage.text_from_html().lower() #print (txt) # Optional Step - writing to a file as temporary backup for troubleshooting io = IoHelper("output.txt") io.fileWriter(txt) #Text cleansing logic below tokenizedTxt = NltkHelper.txtTokenizer("txt",txt)