예제 #1
0
    def getCoOccurrencesFromDB (self, stopWordList, tokens, textCategory):
	# initialise keyword list
	keywords = []

	# initialise token entity list
	tokenEntities = []

	# initialise co-occurrence entity list
	coOccurrenceEntities = []

	# get co-occurrences for each word
	for token in tokens:
	    # get token information from database
	    try:
		tokenEntities = list(token_by_category.select("""token_by_category.token='""" + token + """' AND token_by_category.category='""" + textCategory + """'"""))
		tokenEntity = tokenEntities[0]
		tokenID = str(tokenEntity.id)

		# get five most frequent co-occurences from database
		coOccurrenceEntities = list(co_occurrence.select("""co_occurrence.co_occurs_with='""" + tokenID + """'""", orderBy = co_occurrence.q.count).reversed())[:10]

		# get coOccurrences for each token
		for coOccurrenceEntity in coOccurrenceEntities:
		    keywords.append(coOccurrenceEntity.token)
	    except IndexError:
		pass

	# build set with unique values
	keywords = set(keywords)

	# return keywords
	return keywords
예제 #2
0
    def getCoOccurrencesFromDB(self, stopWordList, tokens, textCategory):
        # initialise keyword list
        keywords = []

        # initialise token entity list
        tokenEntities = []

        # initialise co-occurrence entity list
        coOccurrenceEntities = []

        # get co-occurrences for each word
        for token in tokens:
            # get token information from database
            try:
                tokenEntities = list(
                    token_by_category.select(
                        """token_by_category.token='""" + token +
                        """' AND token_by_category.category='""" +
                        textCategory + """'"""))
                tokenEntity = tokenEntities[0]
                tokenID = str(tokenEntity.id)

                # get five most frequent co-occurences from database
                coOccurrenceEntities = list(
                    co_occurrence.select(
                        """co_occurrence.co_occurs_with='""" + tokenID +
                        """'""",
                        orderBy=co_occurrence.q.count).reversed())[:10]

                # get coOccurrences for each token
                for coOccurrenceEntity in coOccurrenceEntities:
                    keywords.append(coOccurrenceEntity.token)
            except IndexError:
                pass

        # build set with unique values
        keywords = set(keywords)

        # return keywords
        return keywords
예제 #3
0
from model import token_by_category, co_occurrence

# text category
textCategory = 'a'

# initialise co-occurrence entity list
coOccurrenceEntities = []

# get token information from database
tokenEntity = token_by_category.select(
    """token_by_category.token='court' AND token_by_category.category='a'""")

# get five most frequent co-occurences from database
coOccurrenceEntities[:5] = co_occurrence.select(
    """co_occurrence.co_occurs_with='""" + tokenEntity.id + """'""",
    orderBy=co_occurrence.q.count).reversed()

# get coOccurrences for each token
for coOccurrenceEntity in coOccurrenceEntities[:5]:
    print coOccurrenceEntity.token
예제 #4
0
from model import token_by_category, co_occurrence

# text category
textCategory = 'a'

# initialise co-occurrence entity list
coOccurrenceEntities = []

# get token information from database
tokenEntity = token_by_category.select("""token_by_category.token='court' AND token_by_category.category='a'""")

# get five most frequent co-occurences from database
coOccurrenceEntities[:5] = co_occurrence.select("""co_occurrence.co_occurs_with='""" + tokenEntity.id + """'""", orderBy = co_occurrence.q.count).reversed()

# get coOccurrences for each token
for coOccurrenceEntity in coOccurrenceEntities[:5]:
    print coOccurrenceEntity.token