def bio_change(dbname, colname, timename): db = dbt.db_connect_no_auth(dbname) com = db[colname] time = db[timename] filter = { 'liwc_anal.result.i': { '$exists': True }, 'new_liwc_anal.result.i': { '$exists': True } } cw, gw, all = 0, 0, 0 for user in com.find(filter): newtweet = time.find({ 'user.id': user['id'] }, no_cursor_timeout=True).sort([('id', -1)]).limit(1)[0] oldtweet = time.find({ 'user.id': user['id'] }, no_cursor_timeout=True).sort([('id', 1)]).limit(1)[0] newdes = newtweet['user']['description'] olddes = oldtweet['user']['description'] if newdes != olddes: all += 1 newbio = des_miner.process_text(newdes) oldbio = des_miner.process_text(olddes) if 'cw' in newbio and 'cw' in oldbio: if newbio['cw']['value'] != oldbio['cw']['value']: cw += 1 if 'gw' in newbio and 'gw' in oldbio: if newbio['gw']['value'] != oldbio['gw']['value']: gw += 1 print cw, gw, all
def bio_information(dbname='TwitterProAna', colname='users'): com = dbt.db_connect_col(dbname, colname) bio_hist = dbt.db_connect_col(dbname, 'bio') bio_hist.create_index([('id', pymongo.ASCENDING)]) for row in com.find({'screen_name': {'$exists': True}}, no_cursor_timeout=True): name, text = row['name'], row['description'] date = row['lastPolledFull'] if text and name: stats = dm.process_text(text, name) elif text: stats = dm.process_text(text) if stats: stats['date'] = date stats['id'] = row['id'] try: bio_hist.insert(stats) except pymongo.errors.DuplicateKeyError: pass for hist in reversed(row['history']): if 'name' in hist: name = hist['name'] if 'description' in hist: text = hist['description'] if text: stats = dm.process_text(text, name) if stats: stats['date'] = hist['lastPolledFull'] stats['id'] = row['id'] try: bio_hist.insert(stats) except pymongo.errors.DuplicateKeyError: pass
def bio_change(dbname, colname, timename): db = dbt.db_connect_no_auth(dbname) com = db[colname] time = db[timename] filter = {'liwc_anal.result.i':{'$exists':True}, 'new_liwc_anal.result.i':{'$exists':True}} cw, gw, all = 0, 0, 0 for user in com.find(filter): newtweet = time.find({'user.id': user['id']}, no_cursor_timeout=True).sort([('id', -1)]).limit(1)[0] oldtweet = time.find({'user.id': user['id']}, no_cursor_timeout=True).sort([('id', 1)]).limit(1)[0] newdes = newtweet['user']['description'] olddes = oldtweet['user']['description'] if newdes != olddes: all += 1 newbio = des_miner.process_text(newdes) oldbio = des_miner.process_text(olddes) if 'cw' in newbio and 'cw' in oldbio: if newbio['cw']['value'] != oldbio['cw']['value']: cw += 1 if 'gw' in newbio and 'gw' in oldbio: if newbio['gw']['value'] != oldbio['gw']['value']: gw += 1 print cw, gw, all
def variable_change(dbname, comname, oldtimename, newtimename): db = dbt.db_connect_no_auth(dbname) com = db[comname] oldtime = db[oldtimename] newtime = db[newtimename] oldfollower, newfollower, oldfollowee, newfollowee, users, liwcs, olddate, newdate, \ oldcw, newcw, oldgw, newgw, oldage, newage, newcbmi, oldcbmi, newgbmi, oldgbmi = \ [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [] # filter = {'liwc_anal.result.i':{'$exists':True}, 'new_liwc_anal.result.i':{'$exists':True}} filter = { '$or': [{ 'liwc_anal.result.i': { '$exists': True } }, { 'new_liwc_anal.result.i': { '$exists': True } }] } # full analysis variables: # meta_keys = ['WC', 'WPS', 'Sixltr', 'Dic'] # category_keys = ['funct', 'pronoun', 'ppron', 'i', 'we', 'you', 'shehe', # 'they', 'ipron', 'article', 'verb', 'auxverb', 'past', 'present', 'future', # 'adverb', 'preps', 'conj', 'negate', 'quant', 'number', 'swear', 'social', # 'family', 'friend', 'humans', 'affect', 'posemo', 'negemo', 'anx', 'anger', # 'sad', 'cogmech', 'insight', 'cause', 'discrep', 'tentat', 'certain', # 'inhib', 'incl', 'excl', 'percept', 'see', 'hear', 'feel', 'bio', 'body', # 'health', 'sexual', 'ingest', 'relativ', 'motion', 'space', 'time', 'work', # 'achieve', 'leisure', 'home', 'money', 'relig', 'death', 'assent', 'nonfl', # 'filler'] # puncuation_keys = [ # 'Period', 'Comma', 'Colon', 'SemiC', 'QMark', 'Exclam', # 'Dash', 'Quote', 'Apostro', 'Parenth', 'OtherP', 'AllPct'] # allcates = meta_keys + category_keys + puncuation_keys allcates = ['posemo', 'negemo', 'anx', 'anger', 'sad'] for user in com.find(filter): users.append(user['id']) # print user['id'] """LIWC variables""" oldliwc = user['liwc_anal']['result'] newliwc = user['new_liwc_anal']['result'] if newliwc is None: newliwc = {} if oldliwc == None: oldliwc = {} ols = [oldliwc.get(key, None) for key in allcates] nls = [newliwc.get(key, None) for key in allcates] liwcs.append(ols + nls) '''Follower and Followee variables''' # oldtweet = time.find({'user.id': user['id']}, no_cursor_timeout=True).sort([('id', 1)]).limit(1)[0] oldtweets = oldtime.find({ 'user.id': user['id'] }, no_cursor_timeout=True).sort([('id', -1) ]).limit(1) if oldtweets.count() == 0: oldtweets = newtime.find({ 'user.id': user['id'] }, no_cursor_timeout=True).sort([('id', 1) ]).limit(1) oldtweet = oldtweets[0] oldprofile = oldtweet['user'] newtweets = newtime.find({ 'user.id': user['id'] }, no_cursor_timeout=True).sort([('id', -1) ]).limit(1) if newtweets.count() == 0: newtweet = oldtweet newprofile = oldprofile else: newtweet = newtweets[0] newprofile = newtweet['user'] olddate.append(oldtweet['created_at']) newdate.append(newtweet['created_at']) newbio = des_miner.process_text(newprofile['description']) oldbio = des_miner.process_text(oldprofile['description']) oldcw.append(oldbio.get('cw', {}).get('value', None)) newcw.append(newbio.get('cw', {}).get('value', None)) oldgw.append(oldbio.get('gw', {}).get('value', None)) newgw.append(newbio.get('gw', {}).get('value', None)) oldage.append(oldbio.get('a', {}).get('value', None)) newage.append(newbio.get('a', {}).get('value', None)) oldcbmi.append(oldbio.get('cbmi', {}).get('value', None)) newcbmi.append(newbio.get('cbmi', {}).get('value', None)) oldgbmi.append(oldbio.get('gbmi', {}).get('value', None)) newgbmi.append(newbio.get('gbmi', {}).get('value', None)) oldfollower.append(oldprofile['followers_count']) newfollower.append(newprofile['followers_count']) oldfollowee.append(oldprofile['friends_count']) newfollowee.append(newprofile['friends_count']) """Out put Profile variables""" print len(liwcs) newliwccol = ['Old' + key for key in allcates] oldliwccol = ['New' + key for key in allcates] df = pd.DataFrame(data=liwcs, columns=newliwccol + oldliwccol) df['UserID'] = users df['OldFollower'] = oldfollower df['NewFollower'] = newfollower df['OldFollowee'] = oldfollowee df['NewFollowee'] = newfollowee df['OldDate'] = olddate df['NewDate'] = newdate df['OldCW'] = oldcw df['NewCW'] = newcw df['OldGW'] = oldgw df['NewGW'] = newgw df['OldAge'] = oldage df['NewAge'] = newage df['OldCBMI'] = oldcbmi df['NewCBMI'] = newcbmi df['OldGBMI'] = oldgbmi df['NewGBMI'] = newgbmi g1 = gt.load_network_subset(dbname, 'net', {'scraped_times': 2}) g2 = gt.load_network_subset(dbname, 'net', {'scraped_times': 130}) gt.summary(g1) gt.summary(g2) oldindegree_map = dict(zip(g1.vs['name'], g1.indegree())) oldoutdegree_map = dict(zip(g1.vs['name'], g1.outdegree())) oldpagerank_map = dict(zip(g1.vs['name'], g1.pagerank())) oldbetweenness_map = dict(zip(g1.vs['name'], g1.betweenness())) newindegree_map = dict(zip(g2.vs['name'], g2.indegree())) newoutdegree_map = dict(zip(g2.vs['name'], g2.outdegree())) newpagerank_map = dict(zip(g2.vs['name'], g2.pagerank())) newbetweenness_map = dict(zip(g2.vs['name'], g2.betweenness())) df['OldIndegree'] = [oldindegree_map.get(str(uid), 0) for uid in users] df['NewIndegree'] = [newindegree_map.get(str(uid), 0) for uid in users] df['OldOutdegree'] = [oldoutdegree_map.get(str(uid), 0) for uid in users] df['NewOutdegree'] = [newoutdegree_map.get(str(uid), 0) for uid in users] df['OldPagerank'] = [oldpagerank_map.get(str(uid), 0.0) for uid in users] df['NewPagerank'] = [newpagerank_map.get(str(uid), 0.0) for uid in users] df['OldBetweenness'] = [ oldbetweenness_map.get(str(uid), 0.0) for uid in users ] df['NewBetweenness'] = [ newbetweenness_map.get(str(uid), 0.0) for uid in users ] df.to_csv(dbname + '.csv')
def variable_change(dbname, comname, oldtimename, newtimename): db = dbt.db_connect_no_auth(dbname) com = db[comname] oldtime = db[oldtimename] newtime = db[newtimename] oldfollower, newfollower, oldfollowee, newfollowee, users, liwcs, olddate, newdate, \ oldcw, newcw, oldgw, newgw, oldage, newage, newcbmi, oldcbmi, newgbmi, oldgbmi = \ [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [] # filter = {'liwc_anal.result.i':{'$exists':True}, 'new_liwc_anal.result.i':{'$exists':True}} filter = {'$or': [{'liwc_anal.result.i':{'$exists':True}}, {'new_liwc_anal.result.i':{'$exists':True}}]} # full analysis variables: # meta_keys = ['WC', 'WPS', 'Sixltr', 'Dic'] # category_keys = ['funct', 'pronoun', 'ppron', 'i', 'we', 'you', 'shehe', # 'they', 'ipron', 'article', 'verb', 'auxverb', 'past', 'present', 'future', # 'adverb', 'preps', 'conj', 'negate', 'quant', 'number', 'swear', 'social', # 'family', 'friend', 'humans', 'affect', 'posemo', 'negemo', 'anx', 'anger', # 'sad', 'cogmech', 'insight', 'cause', 'discrep', 'tentat', 'certain', # 'inhib', 'incl', 'excl', 'percept', 'see', 'hear', 'feel', 'bio', 'body', # 'health', 'sexual', 'ingest', 'relativ', 'motion', 'space', 'time', 'work', # 'achieve', 'leisure', 'home', 'money', 'relig', 'death', 'assent', 'nonfl', # 'filler'] # puncuation_keys = [ # 'Period', 'Comma', 'Colon', 'SemiC', 'QMark', 'Exclam', # 'Dash', 'Quote', 'Apostro', 'Parenth', 'OtherP', 'AllPct'] # allcates = meta_keys + category_keys + puncuation_keys allcates = ['posemo', 'negemo', 'anx', 'anger', 'sad'] for user in com.find(filter): users.append(user['id']) # print user['id'] """LIWC variables""" oldliwc = user['liwc_anal']['result'] newliwc = user['new_liwc_anal']['result'] if newliwc is None: newliwc = {} if oldliwc == None: oldliwc = {} ols = [oldliwc.get(key, None) for key in allcates] nls = [newliwc.get(key, None) for key in allcates] liwcs.append(ols+nls) '''Follower and Followee variables''' # oldtweet = time.find({'user.id': user['id']}, no_cursor_timeout=True).sort([('id', 1)]).limit(1)[0] oldtweets = oldtime.find({'user.id': user['id']}, no_cursor_timeout=True).sort([('id', -1)]).limit(1) if oldtweets.count() == 0: oldtweets = newtime.find({'user.id': user['id']}, no_cursor_timeout=True).sort([('id', 1)]).limit(1) oldtweet = oldtweets[0] oldprofile = oldtweet['user'] newtweets = newtime.find({'user.id': user['id']}, no_cursor_timeout=True).sort([('id', -1)]).limit(1) if newtweets.count() == 0: newtweet = oldtweet newprofile = oldprofile else: newtweet = newtweets[0] newprofile = newtweet['user'] olddate.append(oldtweet['created_at']) newdate.append(newtweet['created_at']) newbio = des_miner.process_text(newprofile['description']) oldbio = des_miner.process_text(oldprofile['description']) oldcw.append(oldbio.get('cw', {}).get('value', None)) newcw.append(newbio.get('cw', {}).get('value', None)) oldgw.append(oldbio.get('gw', {}).get('value', None)) newgw.append(newbio.get('gw', {}).get('value', None)) oldage.append(oldbio.get('a', {}).get('value', None)) newage.append(newbio.get('a', {}).get('value', None)) oldcbmi.append(oldbio.get('cbmi', {}).get('value', None)) newcbmi.append(newbio.get('cbmi', {}).get('value', None)) oldgbmi.append(oldbio.get('gbmi', {}).get('value', None)) newgbmi.append(newbio.get('gbmi', {}).get('value', None)) oldfollower.append(oldprofile['followers_count']) newfollower.append(newprofile['followers_count']) oldfollowee.append(oldprofile['friends_count']) newfollowee.append(newprofile['friends_count']) """Out put Profile variables""" print len(liwcs) newliwccol = ['Old'+key for key in allcates] oldliwccol = ['New'+key for key in allcates] df = pd.DataFrame(data=liwcs, columns=newliwccol+oldliwccol) df['UserID'] = users df['OldFollower'] = oldfollower df['NewFollower'] = newfollower df['OldFollowee'] = oldfollowee df['NewFollowee'] = newfollowee df['OldDate'] = olddate df['NewDate'] = newdate df['OldCW'] = oldcw df['NewCW'] = newcw df['OldGW'] = oldgw df['NewGW'] = newgw df['OldAge'] = oldage df['NewAge'] = newage df['OldCBMI'] = oldcbmi df['NewCBMI'] = newcbmi df['OldGBMI'] = oldgbmi df['NewGBMI'] = newgbmi g1 = gt.load_network_subset(dbname, 'net', {'scraped_times': 2}) g2 = gt.load_network_subset(dbname, 'net', {'scraped_times': 130}) gt.summary(g1) gt.summary(g2) oldindegree_map = dict(zip(g1.vs['name'], g1.indegree())) oldoutdegree_map = dict(zip(g1.vs['name'], g1.outdegree())) oldpagerank_map = dict(zip(g1.vs['name'], g1.pagerank())) oldbetweenness_map = dict(zip(g1.vs['name'], g1.betweenness())) newindegree_map = dict(zip(g2.vs['name'], g2.indegree())) newoutdegree_map = dict(zip(g2.vs['name'], g2.outdegree())) newpagerank_map = dict(zip(g2.vs['name'], g2.pagerank())) newbetweenness_map = dict(zip(g2.vs['name'], g2.betweenness())) df['OldIndegree'] = [oldindegree_map.get(str(uid), 0) for uid in users] df['NewIndegree'] = [newindegree_map.get(str(uid), 0) for uid in users] df['OldOutdegree'] = [oldoutdegree_map.get(str(uid), 0) for uid in users] df['NewOutdegree'] = [newoutdegree_map.get(str(uid), 0) for uid in users] df['OldPagerank'] = [oldpagerank_map.get(str(uid), 0.0) for uid in users] df['NewPagerank'] = [newpagerank_map.get(str(uid), 0.0) for uid in users] df['OldBetweenness'] = [oldbetweenness_map.get(str(uid), 0.0) for uid in users] df['NewBetweenness'] = [newbetweenness_map.get(str(uid), 0.0) for uid in users] df.to_csv(dbname+'.csv')