Example #1
0
    def posNGram(self,
                 sentence,
                 n=2):  #tokenized sentence       #sentenceTokenize[1]
        '''
            No. POS NGrams          https://cs.nyu.edu/grishman/jet/guide/PennPOS.html
        '''
        posDic = {}
        if n % 2 == 1:  #odd POS
            for i in range(n - n // 2 - 1, len(sentence) - n // 2):
                posTuple = []
                POS = posTag(sentence[i - n // 2:i + n // 2 + 1])
                for p in POS:
                    posTuple.append(p[1])
                posTuple = tuple(posTuple)
                if posTuple not in posDic:
                    posDic[posTuple] = 0
                posDic[posTuple] = posDic[posTuple] + 1
            return (posDic)

        else:  #even POS
            for i in range(len(sentence) - n + 1):
                posTuple = []
                POS = posTag(sentence[i:i + n])
                for p in POS:
                    posTuple.append(p[1])
                posTuple = tuple(posTuple)
                if posTuple not in posDic:
                    posDic[posTuple] = 0
                posDic[posTuple] = posDic[posTuple] + 1
            return (posDic)
Example #2
0
	def grabMySQLdocument(self, description):
		db = MySQLdb.connect("localhost","dondi","","nlpText" )
		cursor = db.cursor()
		sql = """SELECT document FROM Documents WHERE description = '%s'""" % (description)
		cursor.execute(sql)
		result = (cursor.fetchall())
		db.commit()
		db.close()
		result = result[0][0].replace('_', "'")
		words = wordTokenize(result)
		words = posTag(words)
		return words