Esempio n. 1
0
def contentDensity(pylinguistObj):
    if (pylinguistObj.postag == []):
        pylinguistObj.postag= tools.getPosTag(pylinguistObj)
    
    nVerb=0
    nNoun=0
    nAdjective=0
    nAdverb=0
    
    for tag in pylinguistObj.postag:
        word = tag[0]
        word_clas = tag[1]
        #if word_clas == "VB" or word_clas == "VBD" or word_clas == "VBG" or word_clas == "VBN" or word_clas == "VBP" or word_clas == "VBZ" :
        if word_clas == "VERB" :
            nVerb +=1
        #elif word_clas == "NN" or word_clas == "NNS" or word_clas == "NNP" or word_clas == "NNPS" :
        elif word_clas == "NOUN"  :
            nNoun +=1
        #elif word_clas == "JJ" or word_clas == "JJR" or word_clas == "JJS" :
        elif word_clas == "ADJ" :
            nAdjective +=1
        #elif word_clas == "RB" or word_clas == "RBR" or word_clas == "RBS":
        elif word_clas == "ADV" :
            nAdverb +=1
    
    contentDensity=10
    #print ('somatorio %i'%(nVerb+nNoun+nAdjective+nAdverb))
    #print ('dividido por %i'%len(postag))

    content_words = nVerb+nNoun+nAdjective+nAdverb
    function_words = float(len(pylinguistObj.postag)-content_words)
    
    content_density = content_words / function_words if function_words else 0
        
    return content_density
Esempio n. 2
0
def adpPronRatio(pylinguistObj):
	#Counting pronouns
	if (pylinguistObj.postag == []):
		pylinguistObj.postag= tools.getPosTag(pylinguistObj)

	nPRON=0
	for tag in pylinguistObj.postag:
		word = tag[0]
		word_clas = tag[1]
		#if word_clas == "VB" or word_clas == "VBD" or word_clas == "VBG" or word_clas == "VBN" or word_clas == "VBP" or word_clas == "VBZ" :
		if word_clas == "PRON" or word_clas == "PRO-KS" or word_clas == "PRO-KS-REL":
			nPRON +=1
	#print('adjective %i' %nAdjective)
	pronIncidence=0

	try:
		pronIncidence = nPRON / (float(len(pylinguistObj.postag))/1000)
	except:
		pronIncidence = 0
	
	 
	#counting adpositions
	if (pylinguistObj.postag == []):
		pylinguistObj.postag= tools.getPosTag(pylinguistObj)

	nADP=0
	for tag in pylinguistObj.postag:
		word = tag[0]
		word_clas = tag[1]
		#if word_clas == "VB" or word_clas == "VBD" or word_clas == "VBG" or word_clas == "VBN" or word_clas == "VBP" or word_clas == "VBZ" :
		if word_clas == "ADP":
			nADP +=1
	#print('adjective %i' %nAdjective)
	adpIncidence=0

	try:
		adpIncidence = nADP / (float(len(pylinguistObj.postag))/1000)
	except:
		adpIncidence = 0
	
	try:
		ratio = pronIncidence / float(adpIncidence)
	except:
		ratio = 0

	return ratio
Esempio n. 3
0
def adpPronRatio(pylinguistObj):
    #Counting pronouns
    if (pylinguistObj.postag == []):
        pylinguistObj.postag = tools.getPosTag(pylinguistObj)

    nPRON = 0
    for tag in pylinguistObj.postag:
        word = tag[0]
        word_clas = tag[1]
        #if word_clas == "VB" or word_clas == "VBD" or word_clas == "VBG" or word_clas == "VBN" or word_clas == "VBP" or word_clas == "VBZ" :
        if word_clas == "PRON" or word_clas == "PRO-KS" or word_clas == "PRO-KS-REL":
            nPRON += 1
    #print('adjective %i' %nAdjective)
    pronIncidence = 0

    try:
        pronIncidence = nPRON / (float(len(pylinguistObj.postag)) / 1000)
    except:
        pronIncidence = 0

    #counting adpositions
    if (pylinguistObj.postag == []):
        pylinguistObj.postag = tools.getPosTag(pylinguistObj)

    nADP = 0
    for tag in pylinguistObj.postag:
        word = tag[0]
        word_clas = tag[1]
        #if word_clas == "VB" or word_clas == "VBD" or word_clas == "VBG" or word_clas == "VBN" or word_clas == "VBP" or word_clas == "VBZ" :
        if word_clas == "ADP":
            nADP += 1
    #print('adjective %i' %nAdjective)
    adpIncidence = 0

    try:
        adpIncidence = nADP / (float(len(pylinguistObj.postag)) / 1000)
    except:
        adpIncidence = 0

    try:
        ratio = pronIncidence / float(adpIncidence)
    except:
        ratio = 0

    return ratio
Esempio n. 4
0
def calcAmbiquity(pylinguistObj):
    if (pylinguistObj.postag == []):
        pylinguistObj.postag= tools.getPosTag(pylinguistObj)


    nVerb=0
    nNoun=0
    nAdjective=0
    nAdverb=0

    mVerb=0
    mNoun=0
    mAdjective=0
    mAdverb=0

    avgMVerb = 0
    avgMNoun = 0
    avgMAdjective = 0
    avgMAdverb = 0

    AvgAmbiquity = 0


    #Tag Meaning
    #https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html

    for tag in pylinguistObj.postag:
        word = tag[0]
        word_clas = tag[1]
        #print(word_clas)
        if word_clas == "VB" or word_clas == "VBD" or word_clas == "VBG" or word_clas == "VBN" or word_clas == "VBP" or word_clas == "VBZ" :
            nVerb +=1
            mVerb += len(wn.synsets(word))
        elif word_clas == "NN" or word_clas == "NNS" or word_clas == "NNP" or word_clas == "NNPS" :
            nNoun +=1
            mNoun += len(wn.synsets(word))
        elif word_clas == "JJ" or word_clas == "JJR" or word_clas == "JJS" :
            nAdjective +=1
            mAdjective += len(wn.synsets(word))
        elif word_clas == "RB" or word_clas == "RBR" or word_clas == "RBS":
            nAdverb +=1
            mAdverb += len(wn.synsets(word))


    if nVerb > 0: avgMVerb =  mVerb / float(nVerb)
    if nNoun > 0: avgMNoun = mNoun / float(nNoun)
    if nAdjective > 0: avgMAdjective = mAdjective / float(nAdjective)
    if nAdverb > 0: avgMAdverb = mAdverb / float(nAdverb)

    if (nVerb+nNoun+nAdjective+nAdverb) > 0: AvgAmbiquity = (mVerb+mNoun+mAdjective+mAdverb) / float(nVerb+nNoun+nAdjective+nAdverb)
    return AvgAmbiquity
Esempio n. 5
0
def calcAmbiquity(pylinguistObj):
    if (pylinguistObj.postag == []):
        pylinguistObj.postag = tools.getPosTag(pylinguistObj)

    nVerb = 0
    nNoun = 0
    nAdjective = 0
    nAdverb = 0

    mVerb = 0
    mNoun = 0
    mAdjective = 0
    mAdverb = 0

    avgMVerb = 0
    avgMNoun = 0
    avgMAdjective = 0
    avgMAdverb = 0

    AvgAmbiquity = 0

    #Tag Meaning
    #https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html

    for tag in pylinguistObj.postag:
        word = tag[0]
        word_clas = tag[1]
        #print(word_clas)
        if word_clas == "VB" or word_clas == "VBD" or word_clas == "VBG" or word_clas == "VBN" or word_clas == "VBP" or word_clas == "VBZ":
            nVerb += 1
            mVerb += len(wn.synsets(word))
        elif word_clas == "NN" or word_clas == "NNS" or word_clas == "NNP" or word_clas == "NNPS":
            nNoun += 1
            mNoun += len(wn.synsets(word))
        elif word_clas == "JJ" or word_clas == "JJR" or word_clas == "JJS":
            nAdjective += 1
            mAdjective += len(wn.synsets(word))
        elif word_clas == "RB" or word_clas == "RBR" or word_clas == "RBS":
            nAdverb += 1
            mAdverb += len(wn.synsets(word))

    if nVerb > 0: avgMVerb = mVerb / float(nVerb)
    if nNoun > 0: avgMNoun = mNoun / float(nNoun)
    if nAdjective > 0: avgMAdjective = mAdjective / float(nAdjective)
    if nAdverb > 0: avgMAdverb = mAdverb / float(nAdverb)

    if (nVerb + nNoun + nAdjective + nAdverb) > 0:
        AvgAmbiquity = (mVerb + mNoun + mAdjective +
                        mAdverb) / float(nVerb + nNoun + nAdjective + nAdverb)
    return AvgAmbiquity
Esempio n. 6
0
def nounIncidence(pylinguistObj):
	if (pylinguistObj.postag == []):
		pylinguistObj.postag= tools.getPosTag(pylinguistObj)

	nNoun=0
	for tag in pylinguistObj.postag:
		word = tag[0]
		word_clas = tag[1]
		#if word_clas == "NN" or word_clas == "NNS" or word_clas == "NNP" or word_clas == "NNPS" :
		if word_clas == "NOUN" :
			nNoun +=1
	#print('adjective %i' %nAdjective)
	noumIncidence=0
	try:
		noumIncidence = nNoun / (float(pylinguistObj.word_count)/1000)
	except:
		noumIncidence = 0

	return noumIncidence
Esempio n. 7
0
def nounIncidence(pylinguistObj):
    if (pylinguistObj.postag == []):
        pylinguistObj.postag = tools.getPosTag(pylinguistObj)

    nNoun = 0
    for tag in pylinguistObj.postag:
        word = tag[0]
        word_clas = tag[1]
        #if word_clas == "NN" or word_clas == "NNS" or word_clas == "NNP" or word_clas == "NNPS" :
        if word_clas == "NOUN":
            nNoun += 1
    #print('adjective %i' %nAdjective)
    noumIncidence = 0
    try:
        noumIncidence = nNoun / (float(pylinguistObj.word_count) / 1000)
    except:
        noumIncidence = 0

    return noumIncidence
Esempio n. 8
0
def verbIncidence(pylinguistObj):
	if (pylinguistObj.postag == []):
		pylinguistObj.postag= tools.getPosTag(pylinguistObj)

	nVerb=0
	for tag in pylinguistObj.postag:
		word = tag[0]
		word_clas = tag[1]
		#if word_clas == "VB" or word_clas == "VBD" or word_clas == "VBG" or word_clas == "VBN" or word_clas == "VBP" or word_clas == "VBZ" :
		if word_clas == "VERB":
			nVerb +=1
	#print('adjective %i' %nAdjective)
	verbIncidence=0

	try:
		verbIncidence = nVerb / (float(pylinguistObj.word_count)/1000)
	except:
		verbIncidence = 0
	
	return verbIncidence
Esempio n. 9
0
def functionalIncidence(pylinguistObj):
	if (pylinguistObj.postag == []):
		pylinguistObj.postag= tools.getPosTag(pylinguistObj)

	count=0
	for tag in pylinguistObj.postag:
		word = tag[0]
		word_clas = tag[1]
		#if word_clas == "VB" or word_clas == "VBD" or word_clas == "VBG" or word_clas == "VBN" or word_clas == "VBP" or word_clas == "VBZ" :
		if word_clas == "DET" or word_clas == "ADP" or word_clas == "PRON" or word_clas == "CONJ" or word_clas == "IN" :
			count +=1
	#print('adjective %i' %nAdjective)
	incidence=0

	try:
		incidence = count / (float(pylinguistObj.word_count)/1000)
	except:
		incidence = 0
	
	return incidence
Esempio n. 10
0
def contentIncidence(pylinguistObj):
	if (pylinguistObj.postag == []):
		pylinguistObj.postag= tools.getPosTag(pylinguistObj)

	count=0
	for tag in pylinguistObj.postag:
		word = tag[0]
		word_clas = tag[1]
		#if word_clas == "VB" or word_clas == "VBD" or word_clas == "VBG" or word_clas == "VBN" or word_clas == "VBP" or word_clas == "VBZ" :
		if word_clas == "VERB" or word_clas == "NOUN" or word_clas == "ADJ" or word_clas == "ADV" :
			count +=1
	#print('adjective %i' %nAdjective)
	incidence=0

	try:
		incidence = count / (float(pylinguistObj.word_count)/1000)
	except:
		incidence = 0
	
	return incidence
Esempio n. 11
0
def pronIncidence(pylinguistObj):
    if (pylinguistObj.postag == []):
        pylinguistObj.postag = tools.getPosTag(pylinguistObj)

    nPRON = 0
    for tag in pylinguistObj.postag:
        word = tag[0]
        word_clas = tag[1]
        #if word_clas == "VB" or word_clas == "VBD" or word_clas == "VBG" or word_clas == "VBN" or word_clas == "VBP" or word_clas == "VBZ" :
        if word_clas == "PRON" or word_clas == "PRO-KS" or word_clas == "PRO-KS-REL":
            nPRON += 1
    #print('adjective %i' %nAdjective)
    pronIncidence = 0

    try:
        pronIncidence = nPRON / (float(len(pylinguistObj.postag)) / 1000)
    except:
        pronIncidence = 0

    return pronIncidence
Esempio n. 12
0
def pronIncidence(pylinguistObj):
	if (pylinguistObj.postag == []):
		pylinguistObj.postag= tools.getPosTag(pylinguistObj)

	nPRON=0
	for tag in pylinguistObj.postag:
		word = tag[0]
		word_clas = tag[1]
		#if word_clas == "VB" or word_clas == "VBD" or word_clas == "VBG" or word_clas == "VBN" or word_clas == "VBP" or word_clas == "VBZ" :
		if word_clas == "PRON" or word_clas == "PRO-KS" or word_clas == "PRO-KS-REL":
			nPRON +=1
	#print('adjective %i' %nAdjective)
	pronIncidence=0

	try:
		pronIncidence = nPRON / (float(len(pylinguistObj.postag))/1000)
	except:
		pronIncidence = 0
	
	return pronIncidence
Esempio n. 13
0
def advIncidence(pylinguistObj):
	if (pylinguistObj.postag == []):
		pylinguistObj.postag= tools.getPosTag(pylinguistObj)

	nADV=0
	for tag in pylinguistObj.postag:
		word = tag[0]
		word_clas = tag[1]
		#if word_clas == "VB" or word_clas == "VBD" or word_clas == "VBG" or word_clas == "VBN" or word_clas == "VBP" or word_clas == "VBZ" :
		if word_clas == "ADV" or word_clas == "ADV-KS":
			nADV +=1
	#print('adjective %i' %nAdjective)
	advIncidence=0

	try:
		advIncidence = nADV / (float(pylinguistObj.word_count)/1000)
	except:
		advIncidence = 0
	
	return advIncidence
Esempio n. 14
0
def contentIncidence(pylinguistObj):
    if (pylinguistObj.postag == []):
        pylinguistObj.postag = tools.getPosTag(pylinguistObj)

    count = 0
    for tag in pylinguistObj.postag:
        word = tag[0]
        word_clas = tag[1]
        #if word_clas == "VB" or word_clas == "VBD" or word_clas == "VBG" or word_clas == "VBN" or word_clas == "VBP" or word_clas == "VBZ" :
        if word_clas == "VERB" or word_clas == "NOUN" or word_clas == "ADJ" or word_clas == "ADV":
            count += 1
    #print('adjective %i' %nAdjective)
    incidence = 0

    try:
        incidence = count / (float(pylinguistObj.word_count) / 1000)
    except:
        incidence = 0

    return incidence
Esempio n. 15
0
def advIncidence(pylinguistObj):
    if (pylinguistObj.postag == []):
        pylinguistObj.postag = tools.getPosTag(pylinguistObj)

    nADV = 0
    for tag in pylinguistObj.postag:
        word = tag[0]
        word_clas = tag[1]
        #if word_clas == "VB" or word_clas == "VBD" or word_clas == "VBG" or word_clas == "VBN" or word_clas == "VBP" or word_clas == "VBZ" :
        if word_clas == "ADV" or word_clas == "ADV-KS":
            nADV += 1
    #print('adjective %i' %nAdjective)
    advIncidence = 0

    try:
        advIncidence = nADV / (float(pylinguistObj.word_count) / 1000)
    except:
        advIncidence = 0

    return advIncidence
Esempio n. 16
0
def verbIncidence(pylinguistObj):
    if (pylinguistObj.postag == []):
        pylinguistObj.postag = tools.getPosTag(pylinguistObj)

    nVerb = 0
    for tag in pylinguistObj.postag:
        word = tag[0]
        word_clas = tag[1]
        #if word_clas == "VB" or word_clas == "VBD" or word_clas == "VBG" or word_clas == "VBN" or word_clas == "VBP" or word_clas == "VBZ" :
        if word_clas == "VERB":
            nVerb += 1
    #print('adjective %i' %nAdjective)
    verbIncidence = 0

    try:
        verbIncidence = nVerb / (float(pylinguistObj.word_count) / 1000)
    except:
        verbIncidence = 0

    return verbIncidence
Esempio n. 17
0
def functionalIncidence(pylinguistObj):
    if (pylinguistObj.postag == []):
        pylinguistObj.postag = tools.getPosTag(pylinguistObj)

    count = 0
    for tag in pylinguistObj.postag:
        word = tag[0]
        word_clas = tag[1]
        #if word_clas == "VB" or word_clas == "VBD" or word_clas == "VBG" or word_clas == "VBN" or word_clas == "VBP" or word_clas == "VBZ" :
        if word_clas == "DET" or word_clas == "ADP" or word_clas == "PRON" or word_clas == "CONJ" or word_clas == "IN":
            count += 1
    #print('adjective %i' %nAdjective)
    incidence = 0

    try:
        incidence = count / (float(pylinguistObj.word_count) / 1000)
    except:
        incidence = 0

    return incidence
Esempio n. 18
0
def adjectiveIncidence(pylinguistObj):
	if (pylinguistObj.postag == []):
		pylinguistObj.postag = tools.getPosTag(pylinguistObj)


	nAdjective=0

	for tag in pylinguistObj.postag:
		word = tag[0]
		word_clas = tag[1]
		#if word_clas == "JJ" or word_clas == "JJR" or word_clas == "JJS" :
		if word_clas == "ADJ":
			nAdjective +=1

	adjectiveIncidence=0
	try:
		adjectiveIncidence = nAdjective / (float(pylinguistObj.word_count)/1000)
	except:
		adjectiveIncidence = 0

	return adjectiveIncidence
Esempio n. 19
0
def adjectiveIncidence(pylinguistObj):
    if (pylinguistObj.postag == []):
        pylinguistObj.postag = tools.getPosTag(pylinguistObj)

    nAdjective = 0

    for tag in pylinguistObj.postag:
        word = tag[0]
        word_clas = tag[1]
        #if word_clas == "JJ" or word_clas == "JJR" or word_clas == "JJS" :
        if word_clas == "ADJ":
            nAdjective += 1

    adjectiveIncidence = 0
    try:
        adjectiveIncidence = nAdjective / (float(pylinguistObj.word_count) /
                                           1000)
    except:
        adjectiveIncidence = 0

    return adjectiveIncidence
Esempio n. 20
0
    def text(self, text):
        self.types = []
        self.tokens = []
        self.postag = []

        self.text = text
        #.decode('utf-8','ignore')
        self.tokens = tools.getTokens(self)

        #optimization
        self.postag = tools.getPosTag(self)

        #Descriptive counts
        self.tokenized_sentences = descriptive.tokenized_sentences(self)
        self.word_count = descriptive.word_count(self)
        self.sentence_count = descriptive.sentence_count(self)
        self.avg_word_per_sentence = descriptive.avg_word_per_sentence(self)
        #self.word_length = descriptive.word_length(self)                       #Doesn't work, for some reason
        #self.syllable_count = descriptive.syllable_count(self)
        #self.avg_syllables_per_word = descriptive.avg_syllables_per_word(self)

        return self
Esempio n. 21
0
    def text(self, text):
        self.types=[]
        self.tokens=[]
        self.postag=[]

        self.text = text
        #.decode('utf-8','ignore')
        self.tokens = tools.getTokens(self)

        #optimization
        self.postag = tools.getPosTag(self)

       
        #Descriptive counts       
        self.tokenized_sentences = descriptive.tokenized_sentences(self)
        self.word_count = descriptive.word_count(self)
        self.sentence_count = descriptive.sentence_count(self)
        self.avg_word_per_sentence = descriptive.avg_word_per_sentence(self)
        #self.word_length = descriptive.word_length(self)                       #Doesn't work, for some reason
        #self.syllable_count = descriptive.syllable_count(self)    
        #self.avg_syllables_per_word = descriptive.avg_syllables_per_word(self)

        return self