def ngramizer(proctxt, hr):
    txt = []
    for s, sentence in enumerate(proctxt['chunksInClauses']):
        sent = []
        for c, clause in enumerate(sentence):
            nc = len(clause)
            inClause = [0] * nc
            claus = []
            chPat, pols, negn, negtd = clausePolarity(clause, hr)
            for h, chunk in enumerate(clause):
                toks = chunk.tokens
                tags = chunk.tags
                pols = chunk.pols

                ntoks = len(chunk.tokens)

                if chunk.chunkType == 'NP':
                    if ntoks == 1:
                        if tags[0] in ('O', 'X'):
                            continue


#                    tprops = chunk.tprops
#                    tc = Counter(tags)
                    claus.append((chunk, npContext(chunk)))
            sent.append(claus)
        txt.append(sent)
    return (txt)
def ngramizer(proctxt, hr):
    txt = []
    for s, sentence in enumerate(proctxt['chunksInClauses']):
        sent = []
        for c, clause in enumerate(sentence):
            nc = len(clause)
            inClause = [0]*nc
            claus = []
            chPat, pols, negn, negtd = clausePolarity(clause, hr)
            for h, chunk in enumerate(clause):
                toks = chunk.tokens
                tags = chunk.tags
                pols = chunk.pols
                
                ntoks = len(chunk.tokens)

                if chunk.chunkType == 'NP':
                    if ntoks == 1:
                        if tags[0] in ('O','X'):
                            continue
#                    tprops = chunk.tprops
#                    tc = Counter(tags)
                    claus.append((chunk, npContext(chunk)))                    
            sent.append(claus)
        txt.append(sent)
    return(txt)    
Example #3
0
def negatedDomainNoun(procTxt, hr):
    """ """
    #    logger = sys.stdout.write
    domainNouns = hr.resources[RESKEY_DOMAIN_NOUNS]

    ndn = []
    for s, sentence in enumerate(procTxt[PTKEY_CHUNKEDCLAUSES]):
        for c, clause in enumerate(sentence):
            chPat, pols, negn, negtd = clausePolarity(clause, hr)
            for h, chunk in enumerate(clause):
                if chPat[h] == 'NP':
                    hasDomainNoun = False
                    for t, tok in enumerate(chunk.tokens):
                        if tok in domainNouns:
                            hasDomainNoun = True
                            break

                    if hasDomainNoun:
                        if negn[h]:
                            ndn.append(chunk)
                        elif pols[h] < 0 and not negtd[h]:
                            ndn.append(chunk)
                        elif negtd[h]:
                            ndn.append(chunk)
    #                        logger('\n')
    return ndn
Example #4
0
def problemPhraseAnalysis(procTxt, hr):
    """ """

    procTxt = updateTokenAndChunkPropertiesPD(procTxt, hr)
    problems = []
    for s, sentence in enumerate(procTxt[PTKEY_CHUNKEDCLAUSES]):
        sentence_problem = []
        for c, clause in enumerate(sentence):
            clause_problem = defaultdict(list)
            n_vp, n_vpfinite, vpidx, lhs, vp, rhs = clauseVPAnalysis(clause)
            chPat, pols, negn, negtd = clausePolarity(clause, hr)
            pols = [cmp(pol, 0) for pol in pols]
            clpol = [[p, int(n), int(t)] for p, n, t in zip(pols, negn, negtd)]
            if n_vp == 0:
                clause_problem = ppd_degenerateClause(clause, clpol, vpidx, hr)
                clause_problems = [clause_problem]
            elif n_vpfinite < 2:
                clause_problem = ppd_SVClause(clause, clpol, vpidx, hr)
                clause_problems = [clause_problem]
            else:
                clause_problems = ppd_MVClause(clause, clpol, vpidx, hr)
            sentence_problem.append(clause_problems)
        problems.append(sentence_problem)

    return problems
def problemPhraseAnalysis(procTxt, hr):
    """ """
    
    procTxt = updateTokenAndChunkPropertiesPD(procTxt, hr)    
    problems = []
    for s, sentence in enumerate(procTxt[PTKEY_CHUNKEDCLAUSES]):
        sentence_problem = []               
        for c, clause in enumerate(sentence):
            clause_problem = defaultdict(list) 
            n_vp, n_vpfinite, vpidx, lhs, vp, rhs = clauseVPAnalysis(clause)
            chPat, pols, negn, negtd = clausePolarity(clause, hr)
            pols = [cmp(pol,0) for pol in pols]
            clpol = [[p, int(n), int(t)] for p,n,t in zip(pols, negn, negtd)]
            if n_vp == 0:
                clause_problem = ppd_degenerateClause(clause, clpol, vpidx, hr)
                clause_problems = [clause_problem]
            elif n_vpfinite < 2: 
                clause_problem = ppd_SVClause(clause, clpol, vpidx, hr)
                clause_problems = [clause_problem]
            else:
                clause_problems = ppd_MVClause(clause, clpol, vpidx, hr)
            sentence_problem.append(clause_problems)
        problems.append(sentence_problem)
        
    return problems
Example #6
0
def negatedDomainNoun(procTxt, hr):
    """ """
#    logger = sys.stdout.write
    domainNouns = hr.resources[RESKEY_DOMAIN_NOUNS]
    
    ndn = []
    for s, sentence in enumerate(procTxt[PTKEY_CHUNKEDCLAUSES]):
        for c, clause in enumerate(sentence):
            chPat, pols, negn, negtd = clausePolarity(clause, hr)
            for h, chunk in enumerate(clause):                    
                if chPat[h] == 'NP':
                    hasDomainNoun = False
                    for t, tok in enumerate(chunk.tokens):
                        if tok in domainNouns:
                            hasDomainNoun = True
                            break
                    
                    if hasDomainNoun:
                        if negn[h]:
                            ndn.append(chunk)
                        elif pols[h] < 0 and not negtd[h]:
                            ndn.append(chunk)
                        elif negtd[h]:
                            ndn.append(chunk)
    #                        logger('\n')                   
    return ndn
def printErrorLogs(truLabels, mcPrd, procTxts, computedFeatures, truLbl=None, prdLbl=None, printer = sys.stdout.write):
    
    hr = pickle.load(open(DEFAULT_HR_FILE))    
    if not printer:
        printer = sys.stdout.write

    prdLabels  = [mcp[MCKEY_LABEL] for mcp in mcPrd]
    prdScores = [mcp[MCKEY_SCORES] for mcp in mcPrd]

    if truLbl and (not prdLbl):
        errIdx = [k for k, tru in enumerate(truLabels)
                    if tru == truLbl and prdLabels[k] != truLbl]
    elif (not truLbl) and (prdLbl):
        errIdx = [k for k, tru in enumerate(truLabels)
                    if prdLabels[k] == prdLbl and tru != prdLbl]
    else:
        errIdx = [k for k, prd in enumerate(prdLabels) if truLabels[k] == truLbl and prd != truLbl]

    errLog = [(k, truLabels[k], prdLabels[k]) for k in errIdx]
    errLog.sort(key = operator.itemgetter(1, 2))

    for item in errLog:
        k = item[0]
        printer('ID:%d\tTru:%s\tPrd:%s\n' % (item[0], item[1], item[2]))
        for key, val in prdScores[k].iteritems():
            printer('%s:%6.5f ' % (key, val))
        printer('\n')

        procTxt = procTxts[k]
        eb = computedFeatures[k]
        for tok, tag in zip(procTxt[PTKEY_TOKENS], procTxt[PTKEY_TAGS]):
            printer('%s/%s ' % (tok, tag))
        printer('\n')

        isq = questionsInProcTxt(procTxt, hr)
#        print isq

        chunkedSentences = procTxt[PTKEY_CHUNKEDCLAUSES]
        for s, chunkedSentence in enumerate(chunkedSentences):
            for c, clause in enumerate(chunkedSentence):
                chPat, pols, negn, negtd = clausePolarity(clause, hr, printer)
                printer('CLAUSE: %s\n' % clause)
                printer('POLS: %s\n' % pols)
                printer('NEGN: %s\n' % negn)
                printer('NEGTD: %s\n' % negtd)
                printer('isQ:%s\n' % isq[s])
                printer('-\n')

        for featureFuncName, features in eb.iteritems():
            for feature, val in features.iteritems():
                if val:
                    printer('%s %s\n' % (feature, val))
        printer('\n')
Example #8
0
def domainNounInSV(procTxt, hr):
    """ """
    rlhs = []
    rrhs = []
    rvp = []
    for s, sentence in enumerate(procTxt[PTKEY_CHUNKEDCLAUSES]):
        for c, clause in enumerate(sentence):
            chPat, pols, negn, negtd = clausePolarity(clause, hr)
            n_vp, n_vpfinite, vpidx, lhs, vp, rhs = clauseVPAnalysis(clause)

            if n_vp == 1 or n_vpfinite == 1:
                cpnn = [(ch, pols[k], negn[k], negtd[k])
                        for k, ch in enumerate(clause)]

                lhs = [q for q in cpnn[:vpidx]]
                rhs = [q for q in cpnn[vpidx + 1:]]

                lhspols = [item[1] for item in lhs]
                cc = Counter(lhspols)
                if cc[1] and cc[-1]:
                    print clause
                    print lhs
                    print vp
                    print rhs
                    print '----'


#
#                lhschunks = [q[0] for q in lhs]
#                rhschunks = [q[0] for q in rhs]
#
#                lhsDNidx =  hasDomainNoun(lhschunks, hr)
#                rhsDNidx = hasDomainNoun(rhschunks, hr)
#
#                rlhs = [lhs[k] for k in lhsDNidx]
#                rrhs = [rhs[k] for k in rhsDNidx]
#                rvp = [(vp, pols[vpidx], negn[vpidx], negtd[vpidx])]

#                if len(rlhs) > 1:
#                    print rlhs, rvp, rrhs

#
#
##                    if pols[vpidx] < 0:
##                        print [lhs[k] for k in lhsDN], vp, [rhs[k] for k in rhsDN]
#
#                print rlhs, rvp, rrhs
#                print clause
#                print '-----'
    return (rlhs, rrhs, rvp)
Example #9
0
def domainNounInSV(procTxt, hr):
    """ """
    rlhs = []
    rrhs = []
    rvp = []
    for s, sentence in enumerate(procTxt[PTKEY_CHUNKEDCLAUSES]):
        for c, clause in enumerate(sentence):
            chPat, pols, negn, negtd = clausePolarity(clause, hr)
            n_vp, n_vpfinite, vpidx, lhs, vp, rhs = clauseVPAnalysis(clause)
         
            if n_vp == 1 or n_vpfinite == 1:               
                cpnn = [(ch, pols[k], negn[k], negtd[k]) for k, ch in enumerate(clause)]
                
                lhs = [q for q in cpnn[:vpidx]]
                rhs = [q for q in cpnn[vpidx+1:]]
                
                lhspols = [item[1] for item in lhs]
                cc = Counter(lhspols)
                if cc[1] and cc[-1]:
                    print clause
                    print lhs
                    print vp
                    print rhs
                    print '----'
                
#                
#                lhschunks = [q[0] for q in lhs]
#                rhschunks = [q[0] for q in rhs]
#                
#                lhsDNidx =  hasDomainNoun(lhschunks, hr)
#                rhsDNidx = hasDomainNoun(rhschunks, hr)
#                
#                rlhs = [lhs[k] for k in lhsDNidx]
#                rrhs = [rhs[k] for k in rhsDNidx]
#                rvp = [(vp, pols[vpidx], negn[vpidx], negtd[vpidx])]
                
                
#                if len(rlhs) > 1:
#                    print rlhs, rvp, rrhs
                    
#                
#                   
##                    if pols[vpidx] < 0:
##                        print [lhs[k] for k in lhsDN], vp, [rhs[k] for k in rhsDN]
#    
#                print rlhs, rvp, rrhs
#                print clause
#                print '-----'
    return (rlhs, rrhs, rvp)                                    
Example #10
0
def countPolarNGrams(procTxt, hr, featureVals={}, FKEY='countPolarNGrams'):
    if haskey(featureVals, FKEY): return featureVals

    try:
        procTxt[PTKEY_CHUNKEDCLAUSES][0][0][0].tprops
    except:
        procTxt = updateTokenLexicalProperties(procTxt, hr)

    count = {
        KEY_POLARITY_POSITIVE: {},
        KEY_POLARITY_NEGATIVE: {},
        KEY_POLARITY_NEUTRAL: {}
    }

    for k in count:
        for n in hr.resources[RESKEY_POLAR_NGRAMS].availableNgrams:
            count[k][n] = 0

    negation = hr.resources[RESKEY_NEGATORS].getDicts(1, KEY_NEGATION)

    for sentence in procTxt[PTKEY_CHUNKEDCLAUSES]:
        for clause in sentence:
            for chunk in clause:
                chPat, pols, negn, negtd = clausePolarity(clause, hr)
                for k, tok in enumerate(chunk.tokens):
                    pol = pols[k]
                    if negtd[k]:
                        pol = pol * -1
                    pkey = __NumToPol__[pol]
                    n = len(tok.split('_NG_'))
                    count[pkey][n] += 1


#    for ng in aposng:
#        if isngToken(ng):
#            count[ng.polarity][ng.n] += 1

    featureVals[FKEY] = count
    return featureVals
def entity_sentiment(ProcTxt,hr, sentiment_flag=1):

#        try:
#            ProcTxt[PTKEY_CHUNKEDCLAUSES][0][0][0].tprops
#        except:
#            ProcTxt = updateTokenLexicalProperties(ProcTxt, hr)

        retvaldict = defaultdict(list)
        Keyword=[]
        total_toks=[]
        fo=0

        for sen in ProcTxt[PTKEY_CHUNKEDCLAUSES]:
            for bn,clause in enumerate(sen):
                cn=[]
                fo=1
                chPat, pols, negn, negtd = clausePolarity(clause, hr, None)
                pols_neg=[]
                for ind,act in enumerate(negtd):
                    if(act==1 and negn[ind]==0):
                        pols_neg.append(pols[ind]*-1)
                    else:
                        pols_neg.append(pols[ind])
                pols=pols_neg
                mn=[]
                n_vp, n_vpfinite, vpidx, lhs, vp, rhs=clauseVPAnalysis(clause)
                if ((n_vpfinite == 0 and n_vp == 1) or (n_vpfinite == 1)):
                    pols=single_verb(clause,pols,vpidx)
                ind_np=[]
                for c, chunk in enumerate(clause):
                    #logger(' %s' % (chunk))
                    tt=[]

                    if(chunk.chunkType=="NP"):
                        tt=extract_words(chunk,c,chPat,hr)
                        if(len(tt)!=0):
                            ind_np.append(c)
                            ft=" ".join(tt)
                            mn.extend([ft])
                            pols=current_chunkpolarity(c,clause,pols,chPat)
              

                if(len(ind_np)!=0):
                    for key_ind,ind in enumerate(mn):
                        ff=pols[ind_np[key_ind]]
                        gn=ind+pols_dict[ff]
                        cn.extend([gn])
                        retvaldict[ind].append(pols_dict[ff])
                Keyword.extend(cn)

        retval = []
        if(sentiment_flag==0):
            return(retvaldict.keys())
        else:
            #return(retvaldict)
            for k, v in retvaldict.iteritems():
                od = OrderedDict()
                od['entity'] = k
                od['sentiment'] = v
                retval.append(od) #{'aspect':k,'sentiment':v})
        return(retval)
def entity_sentiment(ProcTxt,hr, sentiment_flag=1):

#        try:
#            ProcTxt[PTKEY_CHUNKEDCLAUSES][0][0][0].tprops
#        except:
#            ProcTxt = updateTokenLexicalProperties(ProcTxt, hr)

        retvaldict = defaultdict(list)
        #Keyword=[]
        #total_toks=[]
        fo=0

        for sen in ProcTxt[PTKEY_CHUNKEDCLAUSES]:
            for bn,clause in enumerate(sen):
                #cn=[]
                #fo=1
                chPat, pols, negn, negtd = clausePolarity(clause, hr, None)
                pols_neg=[]
                for ind,act in enumerate(negtd):
                    if(act==1 and negn[ind]==0):
                        pols_neg.append(pols[ind]*-1)
                    else:
                        pols_neg.append(pols[ind])
                pols=pols_neg
                mn=[]
                n_vp, n_vpfinite, vpidx, lhs, vp, rhs=clauseVPAnalysis(clause)
                if ((n_vpfinite == 0 and n_vp == 1) or (n_vpfinite == 1)):
                    pols=single_verb(clause,pols,vpidx)
                ind_np=[]
                for c, chunk in enumerate(clause):
                    #logger(' %s' % (chunk))
                    tt=[]

                    if(chunk.chunkType=="NP"):
                        tt=extract_words(chunk,c,chPat,hr)
                        if(len(tt)!=0):
                            ind_np.append(c)
                            ft=" ".join(tt)
                            mn.extend([ft])
                            pols=current_chunkpolarity(c,clause,pols,chPat)
              
                #print 'mn', mn
                if(len(ind_np)!=0):
                    for key_ind,ind in enumerate(mn):
                        #print 'ind', ind
                        ff=pols[ind_np[key_ind]]
                        phrase = clause[ind_np[key_ind]]
                        phrase = phrase.toktagstr()
                        #gn=ind+pols_dict[ff]
                        #cn.extend([gn])
                        key = '|'.join([ind, phrase])
                        #retvaldict[ind].append(pols_dict[ff]) #((pols_dict[ff], phrase))
                        retvaldict[key].append(pols_dict[ff])
                #Keyword.extend(cn)

        retval = []
        if(sentiment_flag==0):
            return(retvaldict.keys())
        else:
            #return(retvaldict)
            for k, v in retvaldict.iteritems():
                od = OrderedDict()
                k = k.split('|')
                od['entity'] = k[0]
                od['phrase'] = k[1]
                od['sentiment'] = v[0]
                retval.append(od) #{'aspect':k,'sentiment':v})
        return(retval)