Python LIWCMeta 예제들, LIWCMeta Python 예제들

예제 #1

0

파일 보기

파일: getLIWC.py 프로젝트: parikhamol/social-computing-project

def main(in_filename):
    doc = []
    str = "post"

    #this is done because of the different formats of the post and comment files
    if sys.argv[1].find(str) >= 0:
        colnames = [
            'postid', 'time', 'user', 'no1', 'no2', 'no3', 'no4', 'title',
            'post', 'now'
        ]
        posts = pandas.read_csv(sys.argv[1], names=colnames)
        num = posts.now.tolist()
        doc = posts.post.tolist()
    else:
        colnames = [
            'postid', 'commentid', 'time', 'user', 'no1', 'no2', 'no3',
            'comment', 'now'
        ]
        comments = pandas.read_csv(sys.argv[1], names=colnames)
        num = comments.now.tolist()
        doc = comments.comment.tolist()
        id = comments.commentid.tolist()

    index = 0
    all_postive_liwc_measures = []
    all_negative_liwc_measures = []
    print "Getting LIWC measures for ", sys.argv[1]
    liwc_lexicon = LIWCMeta.extract_liwc_features()
    for item in doc:
        if type(item) == float and np.isnan(item):
            item = ""
        outCountDict = LIWCMeta.getLex(item, liwc_lexicon)
        #creating an array of all positive category values for each post and adding them
        #creating an array of all negative category values for each post and adding them
        a = outCountDict['positive_affect']

        if (((outCountDict['article']) + (outCountDict['preposition']) +
             (outCountDict['pronoun']) + (outCountDict['conjunction']) +
             (outCountDict['adverbs']) + (outCountDict['negation']) +
             (outCountDict['auxiliary_verbs'])) / float(num[index])) > 1:
            all_postive_liwc_measures.append(1.0)
        else:
            all_postive_liwc_measures.append(
                ((outCountDict['article']) + (outCountDict['preposition']) +
                 (outCountDict['pronoun']) + (outCountDict['conjunction']) +
                 (outCountDict['adverbs']) + (outCountDict['negation']) +
                 (outCountDict['auxiliary_verbs'])) / float(num[index]))
        all_negative_liwc_measures.append(
            (outCountDict['pronoun']) / float(num[index]))
        index = index + 1

    # This is for all the reddit posts to get postid , positive and negative comments for each post
    if in_filename.find(str) >= 0:
        colnames = [
            'postid', 'Time', 'Author', 'Nocomments', 'upvotes', 'downvotes',
            'updown', 'title', 'commenttext'
        ]
        data = pandas.read_csv('combined_posts.csv', names=colnames)
        postid = list(data.postid)
        with open("combined_emotions_posts.csv", "w") as f:
            writer = csv.writer(f)
            writer.writerows(
                izip(postid, all_postive_liwc_measures,
                     all_negative_liwc_measures))

    # This is for all the reddit comments to get postid for each comment, positive and negative score for each comment
    else:
        colnames = [
            'postid', ' commentid', 'Time', 'Author', 'upvotes', 'downvotes',
            'updown', 'commenttext'
        ]
        data = pandas.read_csv('combined_comments.csv', names=colnames)
        postid = list(data.postid)
        with open("combined_emotions_comments.csv", "w") as f:
            writer = csv.writer(f)
            writer.writerows(
                izip(postid, all_postive_liwc_measures,
                     all_negative_liwc_measures))

        # Get the csv into lists get aggregate LIWC values for comments for each post
        colnames = ['postid', 'positive', 'negative']
        data = pandas.read_csv("combined_emotions_comments.csv",
                               names=colnames)
        postid = list(data.postid)
        positive = list(data.positive)
        negative = list(data.negative)
        unique_postid = []
        positive_unique_postid = []
        negative_unique_postid = []
        p1 = 0
        sum_positive = 0
        sum_negative = 0
        while (p1 < len(postid)):
            for p2 in range(p1, len(postid)):
                if postid[p1] == postid[p2]:
                    continue
                else:
                    unique_postid.append(postid[p1])
                    for i in range(p1, p2):
                        sum_positive += positive[i]
                        sum_negative += negative[i]
                    sum_positive = float(sum_positive) / (p2 - p1 + 1)
                    sum_negative = float(sum_negative) / (p2 - p1 + 1)
                    positive_unique_postid.append(sum_positive)
                    negative_unique_postid.append(sum_negative)
                    sum_positive = 0
                    sum_negative = 0
                    p1 = p2 - 1
                    break
            if p2 == len(postid) - 1:
                for i in range(p1, p2 + 1):
                    sum_positive += positive[i]
                    sum_negative += negative[i]
                positive_unique_postid.append(sum_positive)
                negative_unique_postid.append(sum_negative)
                unique_postid.append(postid[p1])
                break
            p1 = p1 + 1

        #write the average positive and negative LIWC values for all comments of a post into a CSV file
        with open("combined_emotions_comments_unique.csv", "w") as f:
            writer = csv.writer(f)
            writer.writerows(
                izip(unique_postid, positive_unique_postid,
                     negative_unique_postid))

예제 #2

0

파일 보기

파일: getLIWC.py 프로젝트: parikhamol/social-computing-project

def main(in_filename):
    doc = []
    str = "post";
    
    #this is done because of the different formats of the post and comment files
    if sys.argv[1].find(str) >=0:
        colnames = ['postid' , 'time', 'user','no1','no2','no3','no4' ,'title','post', 'now']
        posts = pandas.read_csv(sys.argv[1] ,names=colnames)
        num = posts.now.tolist()
        doc = posts.post.tolist()
    else:
        colnames = ['postid' , 'commentid','time', 'user','no1','no2','no3','comment','now']
        comments = pandas.read_csv(sys.argv[1],names=colnames)
        num = comments.now.tolist()
        doc = comments.comment.tolist()
        id = comments.commentid.tolist()

    index =0
    all_postive_liwc_measures = []
    all_negative_liwc_measures = []
    print "Getting LIWC measures for " , sys.argv[1]
    liwc_lexicon = LIWCMeta.extract_liwc_features()
    for item in doc:
        if type(item) == float and np.isnan(item):
            item = ""
        outCountDict = LIWCMeta.getLex(item, liwc_lexicon)
        #creating an array of all positive category values for each post and adding them
        #creating an array of all negative category values for each post and adding them
        a=outCountDict['positive_affect']

        if (((outCountDict['article'])+(outCountDict['preposition']) + (outCountDict['pronoun'])+ (outCountDict['conjunction'])+ (outCountDict['adverbs'])+ (outCountDict['negation'])+(outCountDict['auxiliary_verbs'])) /float(num[index])) > 1:
            all_postive_liwc_measures.append(1.0)
        else:
            all_postive_liwc_measures.append(((outCountDict['article'])+(outCountDict['preposition']) + (outCountDict['pronoun'])+ (outCountDict['conjunction'])+ (outCountDict['adverbs'])+ (outCountDict['negation'])+(outCountDict['auxiliary_verbs'])) /float(num[index]))
        all_negative_liwc_measures.append((outCountDict['pronoun'] )/float(num[index]))
        index = index + 1

    # This is for all the reddit posts to get postid , positive and negative comments for each post
    if in_filename.find(str) >=0:
        colnames = ['postid' , 'Time' , 'Author','Nocomments', 'upvotes' , 'downvotes' , 'updown' ,'title', 'commenttext']
        data = pandas.read_csv('combined_posts.csv' ,names=colnames)
        postid = list(data.postid)
        with open("combined_emotions_posts.csv","w") as f:
            writer = csv.writer(f)
            writer.writerows(izip(postid,all_postive_liwc_measures,all_negative_liwc_measures))

    # This is for all the reddit comments to get postid for each comment, positive and negative score for each comment
    else:
        colnames = ['postid' , ' commentid' , 'Time' , 'Author', 'upvotes' , 'downvotes' , 'updown' , 'commenttext']
        data = pandas.read_csv('combined_comments.csv' ,names=colnames)
        postid = list(data.postid)
        with open("combined_emotions_comments.csv","w") as f:
            writer = csv.writer(f)
            writer.writerows(izip(postid,all_postive_liwc_measures,all_negative_liwc_measures))

        # Get the csv into lists get aggregate LIWC values for comments for each post
        colnames = ['postid' , 'positive' ,'negative']
        data = pandas.read_csv("combined_emotions_comments.csv", names=colnames)
        postid= list(data.postid)
        positive = list(data.positive)
        negative = list(data.negative)
        unique_postid = []
        positive_unique_postid = []
        negative_unique_postid = []
        p1 =0
        sum_positive = 0
        sum_negative = 0
        while(p1<len(postid)):
            for p2 in range(p1,len(postid)):
                if postid[p1] == postid[p2]:
                    continue
                else:
                    unique_postid.append(postid[p1])
                    for i in range(p1,p2):
                        sum_positive += positive[i]
                        sum_negative += negative[i]
                    sum_positive = float(sum_positive) / (p2-p1+1)
                    sum_negative = float(sum_negative) / (p2-p1+1)
                    positive_unique_postid.append(sum_positive)
                    negative_unique_postid.append(sum_negative)
                    sum_positive = 0
                    sum_negative = 0
                    p1 = p2 -1 
                    break
            if p2 == len(postid) -1:
                for i in range(p1,p2+1):
                    sum_positive += positive[i]
                    sum_negative += negative[i]
                positive_unique_postid.append(sum_positive)
                negative_unique_postid.append(sum_negative)                
                unique_postid.append(postid[p1])
                break
            p1 = p1 + 1

        #write the average positive and negative LIWC values for all comments of a post into a CSV file
        with open("combined_emotions_comments_unique.csv","w") as f:
            writer = csv.writer(f)
            writer.writerows(izip(unique_postid,positive_unique_postid,negative_unique_postid))

예제 #3

0

파일 보기

파일: LIWC_hour.py 프로젝트: parikhamol/social-computing-project

from itertools import izip
import itertools
import pylab as pl

# get all the posts number of words and time of all the posts
colnames = ['hour' , 'post','num_words']

posts = pandas.read_csv('combined_post_hourly.csv' ,names=colnames)
post_body = posts.post.tolist()
num = posts.num_words.tolist()
hour = posts.hour.tolist()

#category is provided in the arguments. Get the normalized liwc values for this category
category1 =[]
index =0
liwc_lexicon = LIWCMeta.extract_liwc_features()
for item in post_body:
    if type(item) == float and np.isnan(item):
        item = ""
    outCountDict = LIWCMeta.getLex(item, liwc_lexicon)
    category1.append((outCountDict[sys.argv[1]])/float(num[index]))
    index = index + 1

#aggregate the liwc values based on the day of the week
one = []
count_one =0
two =[]
count_two =0
three =[]
count_three=0
four =[]

예제 #4

0

파일 보기

파일: LIWC_hour.py 프로젝트: parikhamol/social-computing-project

from itertools import izip
import itertools
import pylab as pl

# get all the posts number of words and time of all the posts
colnames = ['hour', 'post', 'num_words']

posts = pandas.read_csv('combined_post_hourly.csv', names=colnames)
post_body = posts.post.tolist()
num = posts.num_words.tolist()
hour = posts.hour.tolist()

#category is provided in the arguments. Get the normalized liwc values for this category
category1 = []
index = 0
liwc_lexicon = LIWCMeta.extract_liwc_features()
for item in post_body:
    if type(item) == float and np.isnan(item):
        item = ""
    outCountDict = LIWCMeta.getLex(item, liwc_lexicon)
    category1.append((outCountDict[sys.argv[1]]) / float(num[index]))
    index = index + 1

#aggregate the liwc values based on the day of the week
one = []
count_one = 0
two = []
count_two = 0
three = []
count_three = 0
four = []