Exemple #1
0
def keywords_recovery_preed():
    # compare keywords in pro-recovery and pro-ed users' tweets
    prorec, proed = edrelatedcom.rec_proed()
    times = dbt.db_connect_col('fed', 'timeline')
    fdist_rec = FreqDist()
    fdist_ped = FreqDist()
    for user in prorec:
        for tweet in times.find({'user.id':int(user)}):
            text = tweet['text'].encode('utf8')
            # replace RT, @, # and Http://
            text = text.strip().lower()
            text = re.sub(r"(?:(rt\ ?@)|@|https?://)\S+", "", text) # replace RT @, @ and http://
            words = keywords(text)
            for word in words:
                fdist_rec[word] += 1
    for user in proed:
        for tweet in times.find({'user.id':int(user)}):
            text = tweet['text'].encode('utf8')
            # replace RT, @, # and Http://
            text = text.strip().lower()
            text = re.sub(r"(?:(rt\ ?@)|@|https?://)\S+", "", text) # replace RT @, @ and http://
            words = keywords(text)
            for word in words:
                fdist_ped[word] += 1
    print fdist_rec.most_common(50)
    print fdist_ped.most_common(50)
Exemple #2
0
def compare_opinion():
    # Compre pro-recovery and pro-ed users in terms of interventions
    prorec, proed = edrelatedcom.rec_proed() ## based on profiles
    rec_times = dbt.db_connect_col('fed', 'recover')
    # afinn = Afinn(emoticons=True)
    rec_sen, ed_sen = [], []
    for i in xrange(2):
        users = [prorec, proed][i]
        for uid in users:
            textmass = ''
            for tweet in rec_times.find({'user.id': int(uid)}):
                if 'retweeted_status' in tweet:
                    continue
                elif 'quoted_status' in tweet:
                    continue
                else:
                    text = tweet['text'].encode('utf8')
                    text = text.strip().lower()
                    text = re.sub(r"(?:(rt\ ?@)|@|https?://)\S+", "", text) # replace RT @, @ and http://
                    textmass += " " + text
            # sent = afinn.score(textmass)
            sent = sentiment(textmass)[0]
            if sent>50:
                print uid
            [rec_sen, ed_sen][i].append(sent)
    sns.distplot(rec_sen, hist=False, label='Pro-recovery')
    sns.distplot(ed_sen, hist=False, label='Pro-ED')
    plt.show()
Exemple #3
0
def split_treatment():
    rec, proed = edrelatedcom.rec_proed() ## based on profiles
    times = dbt.db_connect_col('fed', 'timeline')
    prior = dbt.db_connect_col('fed', 'prior_treat')
    prior.create_index([('user.id', pymongo.ASCENDING),
                          ('id', pymongo.DESCENDING)])
    prior.create_index([('id', pymongo.ASCENDING)], unique=True)

    post = dbt.db_connect_col('fed', 'post_treat')
    post.create_index([('user.id', pymongo.ASCENDING),
                          ('id', pymongo.DESCENDING)])
    post.create_index([('id', pymongo.ASCENDING)], unique=True)

    for user in rec:
        Find = False
        for tweet in times.find({'user.id': int(user)}).sort([('id', 1)]):  # sort: 1 = ascending, -1 = descending
            if ('retweeted_status' not in tweet) and ('quoted_status' not in tweet):
                text = tweet['text'].encode('utf8')
                text = re.sub(r"(?:(RT\ ?@)|@|https?://)\S+", "", text) # replace RT @, @ and http://
                text = text.strip().lower()
                if 'treatment' in text or 'therap' in text \
                       or 'doctor' in text:
                    Find = True
            if Find:
                post.insert(tweet)
            else:
                prior.insert(tweet)
Exemple #4
0
def compare_weights():
    #Compare distributions of CW and GW between pro-ed and pro-recovery users
    prorec, proed = edrelatedcom.rec_proed() ## based on profiles
    for users in [prorec, proed]:
        field = 'text_anal.cw.value'
        cw = iot.get_values_one_field('fed', 'scom', field, {'id_str': {'$in': users},
                                    field: {'$exists': True}})
        field = 'text_anal.gw.value'
        gw = iot.get_values_one_field('fed', 'scom', field, {'id_str': {'$in': users},
                                    field: {'$exists': True}})
        sns.distplot(cw, hist=False, label='CW')
        sns.distplot(gw, hist=False, label='GW')
        plt.show()
Exemple #5
0
def test():
    # times = dbt.db_connect_col('fed', 'treat')
    # for tweet in times.find():
    #     print ' '.join(tweet['text'].split())

    #    text = """
 # The cause of eating disorders is not clear.[3] Both biological and environmental factors appear to play a role.[1][3] Cultural idealization of thinness is believed to contribute.[3] Eating disorders affect about 12 percent of dancers.[4] Those who have experienced sexual abuse are also more likely to develop eating disorders.[5] Some disorders such as pica and rumination disorder occur more often in people with intellectual disabilities. Only one eating disorder can be diagnosed at a given time.[2]
 #
 #               """
 #    print keywords(text)

    users = iot.get_values_one_field('fed', 'recover', 'user.id')
    prerec , proed = edrelatedcom.rec_proed()
    pusers = [str(i) for i in users]
    print len(pusers)
    print len(prerec)
    uoi = (set(pusers).intersection(set(prerec)))
    com = dbt.db_connect_col('fed', 'scom')
    for u in uoi:
        user = com.find_one({'id': int(u)})
        print user['screen_name']
Exemple #6
0
def recovery_user_treatment_tweet():
    # verify when 'treatment' in pro-recovery users timeline
    rec, proed = edrelatedcom.rec_proed() ## based on profiles
    times = dbt.db_connect_col('fed', 'timeline')
    count = 0
    for user in rec:
        flag = False
        for tweet in times.find({'user.id': int(user)}):
            if 'retweeted_status' in tweet:
                continue
            elif 'quoted_status' in tweet:
                continue
            else:
                text = tweet['text'].encode('utf8')
                text = re.sub(r"(?:(RT\ ?@)|@|https?://)\S+", "", text) # replace RT @, @ and http://
                text = text.strip().lower()
                if 'treatment' in text or 'therap' in text \
                       or 'doctor' in text:
                    print ' '.join(tweet['text'].split())
                    flag = True
        if flag:
            count += 1
            print user
    print len(rec), count
Exemple #7
0
def recover_proed_inter():
    # Compare difference between pro-ed and pro-recovery uses in social networking
    prorec, proed = edrelatedcom.rec_proed() ## based on profiles
    com = dbt.db_connect_col('fed', 'scom')
    g = gt.load_network('fed', 'snet')
    data = []

    for node in g.vs:
        uid = node['name']
        user = com.find_one({'id': int(uid)})
        followeecount = user['friends_count']
        followercount = user['followers_count']
        followees = set([g.vs[v]['name'] for v in g.successors(uid)])
        followers = set([g.vs[v]['name'] for v in g.predecessors(uid)])
        recc_followee, proc_followee, edc_followee = 0.0, 0.0, 0.0
        for u in followees:
            if u in prorec:
                recc_followee += 1
            elif u in proed:
                proc_followee += 1
            else:
                edc_followee += 1
        if followeecount != 0:
            recc_followee /= followeecount
            proc_followee /= followeecount
            edc_followee /= followeecount
        else:
            print 'Followee number is zero', uid
        otherc_followee = 1 - recc_followee - proc_followee - edc_followee

        recc_follower, proc_follower, edc_follower = 0.0, 0.0, 0.0
        for u in followers:
            if u in prorec:
                recc_follower += 1
            elif u in proed:
                proc_follower += 1
            else:
                edc_follower += 1
        if followercount != 0:
            recc_follower /= followercount
            proc_follower /= followercount
            edc_follower /= followercount
        else:
            print 'Follower number is zero', uid
        otherc_follower = 1 - recc_follower - proc_follower - edc_follower

        if uid in prorec:
            data.append(['Rec', recc_followee, 'Rec-Followees'])
            data.append(['Rec', proc_followee, 'Ped-Followees'])
            data.append(['Rec', edc_followee, 'ED-Followees'])
            data.append(['Rec', otherc_followee, 'Oth-Followees'])

            data.append(['Rec', recc_follower, 'Rec-Followers'])
            data.append(['Rec', proc_follower, 'Ped-Followers'])
            data.append(['Rec', edc_follower, 'ED-Followers'])
            data.append(['Rec', otherc_follower, 'Oth-Followers'])

        elif uid in proed:
            data.append(['Ped', proc_followee, 'Ped-Followees'])
            data.append(['Ped', recc_followee, 'Rec-Followees'])
            data.append(['Ped', edc_followee, 'ED-Followees'])
            data.append(['Ped', otherc_followee, 'Oth-Followees'])

            data.append(['Ped', proc_follower, 'Ped-Followers'])
            data.append(['Ped', recc_follower, 'Rec-Followers'])
            data.append(['Ped', edc_follower, 'ED-Followers'])
            data.append(['Ped', otherc_follower, 'Oth-Followers'])
        else:
            pass
    df = pd.DataFrame(data, columns=['Group', 'Proportion', 'Feature'])
    df.to_csv('inter.csv')
    sns.set(style="whitegrid", palette="pastel", color_codes=True)
    sns.boxplot(x="Feature", y="Proportion", hue="Group", data=df, palette="PRGn")
    sns.despine(offset=10, trim=True)
    # plt.ylim(0, 0.8)
    # sns.violinplot(x="Feature", y="Values", hue="Group", data=df, split=True,
    #            inner="quart", palette="PRGn")
    # sns.despine(left=True)
    plt.show()