Python lexengineの例、lexEngine.lexengine.lexengine Pythonの例

コード例 #1

0

ファイルを表示

ファイル: client.py プロジェクト: mpinkerman/wordhound

	def buildDictionaryFromTwitterSearchTerm(self, term):
		t = twitterDicts.twitterSearch()
		print "How many tweets would you like to analyse?:(Default = 700) (Max = 700)"
		count = int(raw_input())
		lex = LE.lexengine(t.searchByTerm(term, count), self.workingDirectory+"TwitterSearchTermDictionary.txt")
		print "[+] Beginning trim..."
		lex.trimPercentage()

コード例 #2

0

ファイルを表示

ファイル: client.py プロジェクト: sealight001/wordhound

 def buildDictionaryFromTwitterUsername(self, handle):
     t = twitterDicts.twitterSearch()
     lex = LE.lexengine(
         t.searchByUser(handle),
         self.workingDirectory + "TwitterHandleDictionary.txt")
     print "[+] Beginning trim..."
     lex.trimPercentage()

コード例 #3

0

ファイルを表示

ファイル: client.py プロジェクト: sealight001/wordhound

 def buildDumbWebscrape(self, rLevels):
     self.recursionLevels = rLevels
     self.crawl(rLevels)
     lex = LE.lexengine(self.crawledText,
                        self.workingDirectory + "WebsiteDictionary.txt")
     print "[+] Beginning trim..."
     lex.trimPercentage()

コード例 #4

0

ファイルを表示

ファイル: client.py プロジェクト: sealight001/wordhound

 def buildDictionaryFromTwitterSearchTerm(self, term):
     t = twitterDicts.twitterSearch()
     print "How many tweets would you like to analyse?:(Default = 700) (Max = 700)"
     count = int(raw_input())
     lex = LE.lexengine(
         t.searchByTerm(term, count),
         self.workingDirectory + "TwitterSearchTermDictionary.txt")
     print "[+] Beginning trim..."
     lex.trimPercentage()

コード例 #5

0

ファイルを表示

ファイル: client.py プロジェクト: mpinkerman/wordhound

	def buildDictionaryReddit(self, subredditList):
		print "[+] Beginning to Crawl Reddit\n"
		x = reddit.Reddit(subredditList)
		x.crawl()
		try:
			lex = LE.lexengine(x.rawText, self.workingDirectory+"RedditDictionary.txt")
			print "[+] Beginning analysis..."
			lex.trimPercentage()
		except:
			print "[-] Could not process file."

コード例 #6

0

ファイルを表示

ファイル: client.py プロジェクト: sealight001/wordhound

 def buildDictionaryReddit(self, subredditList):
     print "[+] Beginning to Crawl Reddit\n"
     x = reddit.Reddit(subredditList)
     x.crawl()
     try:
         lex = LE.lexengine(x.rawText,
                            self.workingDirectory + "RedditDictionary.txt")
         print "[+] Beginning analysis..."
         lex.trimPercentage()
     except:
         print "[-] Could not process file."

コード例 #7

0

ファイルを表示

ファイル: client.py プロジェクト: mpinkerman/wordhound

	def buildDictionaryPdf(self, fileLoc):
		fileLoc = fileLoc.strip()
		
		if fileLoc[0] == '\'' and fileLoc[len(fileLoc)-1] == '\'':
			fileLoc = fileLoc[1:len(fileLoc)-1]
		while(not (os.path.exists(fileLoc))):
			print "[-] The file location entered does not seem to exist:\n{0}.\nPlease reenter the path.".format(fileLoc)
			fileLoc = raw_input()
	#try:
		x = subprocess.Popen(['ps2ascii', fileLoc], stdout=subprocess.PIPE)
		print "[-] Extracting text from pdf."
		lex = LE.lexengine(x.stdout.read(), self.workingDirectory+"PdfDictionary.txt")
		
		print "[+] Beginning analysis..."
		lex.trimPercentage()

コード例 #8

0

ファイルを表示

ファイル: client.py プロジェクト: mpinkerman/wordhound

	def buildAggregate(self):
		lex = LE.lexengine("", self.workingDirectory+"AggregateDictionary.txt", False)
		
		dirname = self.workingDirectory
		currDicts = []
		if os.path.exists(self.workingDirectory + 'PdfDictionary.txt'):
			currDicts.append(self.workingDirectory+"PdfDictionary.txt")
		if os.path.exists(self.workingDirectory + 'TxtDictionary.txt'):
				currDicts.append(self.workingDirectory+"TxtDictionary.txt")
		if os.path.exists(self.workingDirectory + 'WebsiteDictionary.txt'):
				currDicts.append(self.workingDirectory+"WebsiteDictionary.txt")
		if os.path.exists(self.workingDirectory + 'TwitterHandleDictionary.txt'):
			currDicts.append(self.workingDirectory+"TwitterHandleDictionary.txt")
		if os.path.exists(self.workingDirectory + 'TwitterSearchTermDictionary.txt'):
			currDicts.append(self.workingDirectory+"TwitterSearchTermDictionary.txt")
		lex.aggregateDict(currDicts)

コード例 #9

0

ファイルを表示

ファイル: Main.py プロジェクト: sealight001/wordhound

def generateIndustryDictionary(industry):
	currInds = []
	for dirname in returnDirs("data/industries/"+industry+'/'):
		if os.path.exists("data/industries/"+industry+'/'+dirname+'/'+"PdfDictionary.txt"):
			currInds.append("data/industries/"+industry+'/'+dirname+'/'+"PdfDictionary.txt")
		if os.path.exists("data/industries/"+industry+'/'+dirname+'/'+"WebsiteDictionary.txt"):
			currInds.append("data/industries/"+industry+'/'+dirname+'/'+"WebsiteDictionary.txt")
		if os.path.exists("data/industries/"+industry+'/'+dirname+'/'+"TxtDictionary.txt"):
			currInds.append("data/industries/"+industry+'/'+dirname+'/'+"TxtDictionary.txt")
		if os.path.exists("data/industries/"+industry+'/'+dirname+'/'+"TwitterSearchTermDictionary.txt"):
			currInds.append("data/industries/"+industry+'/'+dirname+'/'+"TwitterSearchTermDictionary.txt")
		if os.path.exists("data/industries/"+industry+'/'+dirname+'/'+"TwitterHandleDictionary.txt"):
			currInds.append("data/industries/"+industry+'/'+dirname+'/'+"TwitterHandleDictionary.txt")
	lex = LE.lexengine("", "data/industries/"+industry+'/'+"IndustryDictionary.txt", False)
	print "[+] Beginning dictionary aggregation..."
	lex.aggregateDict(currInds)
	print ""

コード例 #10

0

ファイルを表示

ファイル: client.py プロジェクト: sealight001/wordhound

    def buildDictionaryPdf(self, fileLoc):
        fileLoc = fileLoc.strip()

        if fileLoc[0] == '\'' and fileLoc[len(fileLoc) - 1] == '\'':
            fileLoc = fileLoc[1:len(fileLoc) - 1]
        while (not (os.path.exists(fileLoc))):
            print "[-] The file location entered does not seem to exist:\n{0}.\nPlease reenter the path.".format(
                fileLoc)
            fileLoc = raw_input()
    #try:
        x = subprocess.Popen(['ps2ascii', fileLoc], stdout=subprocess.PIPE)
        print "[-] Extracting text from pdf."
        lex = LE.lexengine(x.stdout.read(),
                           self.workingDirectory + "PdfDictionary.txt")

        print "[+] Beginning analysis..."
        lex.trimPercentage()

コード例 #11

0

ファイルを表示

ファイル: client.py プロジェクト: mpinkerman/wordhound

	def buildDictionaryText(self, fileLoc):
		fileLoc = fileLoc.strip()
		if fileLoc[0] == '\'' and fileLoc[len(fileLoc)-1] == '\'':
			fileLoc = fileLoc[1:len(fileLoc)-1]
		while(not(os.path.exists(fileLoc))):
			print "[-] The file location entered does not seem to exist:\n{0}.\nTry again.".format(fileLoc)
			fileLoc = raw_input()
		try:
			f = open(fileLoc)
			text = f.read()
			f.close()
		except:
			print "[-] Could not read file."
		try:
			lex = LE.lexengine(text, self.workingDirectory+"TxtDictionary.txt")
			print "[+] Beginning analysis..."
			lex.trimPercentage()
		except:
			print "[-] Could not process file."

コード例 #12

0

ファイルを表示

ファイル: client.py プロジェクト: sealight001/wordhound

 def buildDictionaryText(self, fileLoc):
     fileLoc = fileLoc.strip()
     if fileLoc[0] == '\'' and fileLoc[len(fileLoc) - 1] == '\'':
         fileLoc = fileLoc[1:len(fileLoc) - 1]
     while (not (os.path.exists(fileLoc))):
         print "[-] The file location entered does not seem to exist:\n{0}.\nTry again.".format(
             fileLoc)
         fileLoc = raw_input()
     try:
         f = open(fileLoc)
         text = f.read()
         f.close()
     except:
         print "[-] Could not read file."
     try:
         lex = LE.lexengine(text,
                            self.workingDirectory + "TxtDictionary.txt")
         print "[+] Beginning analysis..."
         lex.trimPercentage()
     except:
         print "[-] Could not process file."

コード例 #13

0

ファイルを表示

ファイル: client.py プロジェクト: sealight001/wordhound

    def buildAggregate(self):
        lex = LE.lexengine("",
                           self.workingDirectory + "AggregateDictionary.txt",
                           False)

        dirname = self.workingDirectory
        currDicts = []
        if os.path.exists(self.workingDirectory + 'PdfDictionary.txt'):
            currDicts.append(self.workingDirectory + "PdfDictionary.txt")
        if os.path.exists(self.workingDirectory + 'TxtDictionary.txt'):
            currDicts.append(self.workingDirectory + "TxtDictionary.txt")
        if os.path.exists(self.workingDirectory + 'WebsiteDictionary.txt'):
            currDicts.append(self.workingDirectory + "WebsiteDictionary.txt")
        if os.path.exists(self.workingDirectory +
                          'TwitterHandleDictionary.txt'):
            currDicts.append(self.workingDirectory +
                             "TwitterHandleDictionary.txt")
        if os.path.exists(self.workingDirectory +
                          'TwitterSearchTermDictionary.txt'):
            currDicts.append(self.workingDirectory +
                             "TwitterSearchTermDictionary.txt")
        lex.aggregateDict(currDicts)

コード例 #14

0

ファイルを表示

ファイル: client.py プロジェクト: mpinkerman/wordhound

	def buildDictionaryFromTwitterUsername(self, handle):
		t = twitterDicts.twitterSearch()
		lex = LE.lexengine(t.searchByUser(handle), self.workingDirectory+"TwitterHandleDictionary.txt")
		print "[+] Beginning trim..."
		lex.trimPercentage()

コード例 #15

0

ファイルを表示

ファイル: client.py プロジェクト: mpinkerman/wordhound

	def buildDumbWebscrape(self, rLevels):
		self.recursionLevels = rLevels
		self.crawl(rLevels)
		lex = LE.lexengine(self.crawledText, self.workingDirectory+"WebsiteDictionary.txt")
		print "[+] Beginning trim..."
		lex.trimPercentage()