def buildDictionaryFromTwitterSearchTerm(self, term): t = twitterDicts.twitterSearch() print "How many tweets would you like to analyse?:(Default = 700) (Max = 700)" count = int(raw_input()) lex = LE.lexengine(t.searchByTerm(term, count), self.workingDirectory+"TwitterSearchTermDictionary.txt") print "[+] Beginning trim..." lex.trimPercentage()
def buildDictionaryFromTwitterUsername(self, handle): t = twitterDicts.twitterSearch() lex = LE.lexengine( t.searchByUser(handle), self.workingDirectory + "TwitterHandleDictionary.txt") print "[+] Beginning trim..." lex.trimPercentage()
def buildDumbWebscrape(self, rLevels): self.recursionLevels = rLevels self.crawl(rLevels) lex = LE.lexengine(self.crawledText, self.workingDirectory + "WebsiteDictionary.txt") print "[+] Beginning trim..." lex.trimPercentage()
def buildDictionaryFromTwitterSearchTerm(self, term): t = twitterDicts.twitterSearch() print "How many tweets would you like to analyse?:(Default = 700) (Max = 700)" count = int(raw_input()) lex = LE.lexengine( t.searchByTerm(term, count), self.workingDirectory + "TwitterSearchTermDictionary.txt") print "[+] Beginning trim..." lex.trimPercentage()
def buildDictionaryReddit(self, subredditList): print "[+] Beginning to Crawl Reddit\n" x = reddit.Reddit(subredditList) x.crawl() try: lex = LE.lexengine(x.rawText, self.workingDirectory+"RedditDictionary.txt") print "[+] Beginning analysis..." lex.trimPercentage() except: print "[-] Could not process file."
def buildDictionaryReddit(self, subredditList): print "[+] Beginning to Crawl Reddit\n" x = reddit.Reddit(subredditList) x.crawl() try: lex = LE.lexengine(x.rawText, self.workingDirectory + "RedditDictionary.txt") print "[+] Beginning analysis..." lex.trimPercentage() except: print "[-] Could not process file."
def buildDictionaryPdf(self, fileLoc): fileLoc = fileLoc.strip() if fileLoc[0] == '\'' and fileLoc[len(fileLoc)-1] == '\'': fileLoc = fileLoc[1:len(fileLoc)-1] while(not (os.path.exists(fileLoc))): print "[-] The file location entered does not seem to exist:\n{0}.\nPlease reenter the path.".format(fileLoc) fileLoc = raw_input() #try: x = subprocess.Popen(['ps2ascii', fileLoc], stdout=subprocess.PIPE) print "[-] Extracting text from pdf." lex = LE.lexengine(x.stdout.read(), self.workingDirectory+"PdfDictionary.txt") print "[+] Beginning analysis..." lex.trimPercentage()
def buildAggregate(self): lex = LE.lexengine("", self.workingDirectory+"AggregateDictionary.txt", False) dirname = self.workingDirectory currDicts = [] if os.path.exists(self.workingDirectory + 'PdfDictionary.txt'): currDicts.append(self.workingDirectory+"PdfDictionary.txt") if os.path.exists(self.workingDirectory + 'TxtDictionary.txt'): currDicts.append(self.workingDirectory+"TxtDictionary.txt") if os.path.exists(self.workingDirectory + 'WebsiteDictionary.txt'): currDicts.append(self.workingDirectory+"WebsiteDictionary.txt") if os.path.exists(self.workingDirectory + 'TwitterHandleDictionary.txt'): currDicts.append(self.workingDirectory+"TwitterHandleDictionary.txt") if os.path.exists(self.workingDirectory + 'TwitterSearchTermDictionary.txt'): currDicts.append(self.workingDirectory+"TwitterSearchTermDictionary.txt") lex.aggregateDict(currDicts)
def generateIndustryDictionary(industry): currInds = [] for dirname in returnDirs("data/industries/"+industry+'/'): if os.path.exists("data/industries/"+industry+'/'+dirname+'/'+"PdfDictionary.txt"): currInds.append("data/industries/"+industry+'/'+dirname+'/'+"PdfDictionary.txt") if os.path.exists("data/industries/"+industry+'/'+dirname+'/'+"WebsiteDictionary.txt"): currInds.append("data/industries/"+industry+'/'+dirname+'/'+"WebsiteDictionary.txt") if os.path.exists("data/industries/"+industry+'/'+dirname+'/'+"TxtDictionary.txt"): currInds.append("data/industries/"+industry+'/'+dirname+'/'+"TxtDictionary.txt") if os.path.exists("data/industries/"+industry+'/'+dirname+'/'+"TwitterSearchTermDictionary.txt"): currInds.append("data/industries/"+industry+'/'+dirname+'/'+"TwitterSearchTermDictionary.txt") if os.path.exists("data/industries/"+industry+'/'+dirname+'/'+"TwitterHandleDictionary.txt"): currInds.append("data/industries/"+industry+'/'+dirname+'/'+"TwitterHandleDictionary.txt") lex = LE.lexengine("", "data/industries/"+industry+'/'+"IndustryDictionary.txt", False) print "[+] Beginning dictionary aggregation..." lex.aggregateDict(currInds) print ""
def buildDictionaryPdf(self, fileLoc): fileLoc = fileLoc.strip() if fileLoc[0] == '\'' and fileLoc[len(fileLoc) - 1] == '\'': fileLoc = fileLoc[1:len(fileLoc) - 1] while (not (os.path.exists(fileLoc))): print "[-] The file location entered does not seem to exist:\n{0}.\nPlease reenter the path.".format( fileLoc) fileLoc = raw_input() #try: x = subprocess.Popen(['ps2ascii', fileLoc], stdout=subprocess.PIPE) print "[-] Extracting text from pdf." lex = LE.lexengine(x.stdout.read(), self.workingDirectory + "PdfDictionary.txt") print "[+] Beginning analysis..." lex.trimPercentage()
def buildDictionaryText(self, fileLoc): fileLoc = fileLoc.strip() if fileLoc[0] == '\'' and fileLoc[len(fileLoc)-1] == '\'': fileLoc = fileLoc[1:len(fileLoc)-1] while(not(os.path.exists(fileLoc))): print "[-] The file location entered does not seem to exist:\n{0}.\nTry again.".format(fileLoc) fileLoc = raw_input() try: f = open(fileLoc) text = f.read() f.close() except: print "[-] Could not read file." try: lex = LE.lexengine(text, self.workingDirectory+"TxtDictionary.txt") print "[+] Beginning analysis..." lex.trimPercentage() except: print "[-] Could not process file."
def buildDictionaryText(self, fileLoc): fileLoc = fileLoc.strip() if fileLoc[0] == '\'' and fileLoc[len(fileLoc) - 1] == '\'': fileLoc = fileLoc[1:len(fileLoc) - 1] while (not (os.path.exists(fileLoc))): print "[-] The file location entered does not seem to exist:\n{0}.\nTry again.".format( fileLoc) fileLoc = raw_input() try: f = open(fileLoc) text = f.read() f.close() except: print "[-] Could not read file." try: lex = LE.lexengine(text, self.workingDirectory + "TxtDictionary.txt") print "[+] Beginning analysis..." lex.trimPercentage() except: print "[-] Could not process file."
def buildAggregate(self): lex = LE.lexengine("", self.workingDirectory + "AggregateDictionary.txt", False) dirname = self.workingDirectory currDicts = [] if os.path.exists(self.workingDirectory + 'PdfDictionary.txt'): currDicts.append(self.workingDirectory + "PdfDictionary.txt") if os.path.exists(self.workingDirectory + 'TxtDictionary.txt'): currDicts.append(self.workingDirectory + "TxtDictionary.txt") if os.path.exists(self.workingDirectory + 'WebsiteDictionary.txt'): currDicts.append(self.workingDirectory + "WebsiteDictionary.txt") if os.path.exists(self.workingDirectory + 'TwitterHandleDictionary.txt'): currDicts.append(self.workingDirectory + "TwitterHandleDictionary.txt") if os.path.exists(self.workingDirectory + 'TwitterSearchTermDictionary.txt'): currDicts.append(self.workingDirectory + "TwitterSearchTermDictionary.txt") lex.aggregateDict(currDicts)
def buildDictionaryFromTwitterUsername(self, handle): t = twitterDicts.twitterSearch() lex = LE.lexengine(t.searchByUser(handle), self.workingDirectory+"TwitterHandleDictionary.txt") print "[+] Beginning trim..." lex.trimPercentage()
def buildDumbWebscrape(self, rLevels): self.recursionLevels = rLevels self.crawl(rLevels) lex = LE.lexengine(self.crawledText, self.workingDirectory+"WebsiteDictionary.txt") print "[+] Beginning trim..." lex.trimPercentage()