from main.util.db import openDb import json from scipy import stats from main.util.common import SimpleDoc, powerset, Log import math import itertools from main.metrics.get_experts import metricNames db = openDb("stuff/localconnect.json") l = Log(filename="foo", verbose=True) topCategories = ['Biology', 'Medicine', 'Sports', 'Culture', 'Technology', 'Education', 'Health', 'Business', 'Belief', 'Humanities', 'Society', 'Life', 'Arts', 'Language', 'Law', 'History', 'Geography', 'Agriculture', 'Politics', 'Mathematics', 'Science', 'Nature', 'Environment', 'People', 'Chronology' ] outcomes = { } """for cats in topCategories: l.log("analyze categories: " + repr(cats)) cur = db.cursor() scores = { } for ind, metricName in zip(range(0, len(metricNames)), metricNames): pairs = [] for doc in docs: numExpertTweets = sum((1 for usr in doc[0] if not usr in patrickExperts)) metricScore = doc[1][ind]
import json from main.util.db import openDb from scipy import stats from main.util.common import SimpleDoc, powerset, Log import math import itertools expertTopics = list(map(lambda s: s.strip(), open("data/expert_topics", "r"))) l = Log(filename="foo", verbose=True) docs = map(lambda doc: [map(lambda tweet: tweet.username, doc.tweets), map(lambda metric: metric[1](doc), metrics)], SimpleDoc.getallBetween((2012,6), (2012,8))) baseline = { } for ind, metricName in zip(range(0, len(metricNames)), metricNames): pairs = [] for doc in docs: numTweets = len(doc[0]) metricScore = doc[1][ind] pairs.append([numTweets, metricScore]) x, y = zip(*pairs) s, p = stats.spearmanr(x, y) baseline[metricName] = s count = 0 count2 = 0 for ind, metricName in zip(range(0, len(metricNames)), metricNames): pairs = []
from main.util.common import SimpleDoc, rankCorrelation, pearsonCorrelation, allCorrelations, Log import json import numpy as np from scipy import stats import random from get_experts import getPatrickExperts, metrics, metricNames, getBaseline, minimizedDocs, getWordExperts, correlationWrtUsers, corrComparision, remainNTweets, corrDiffs, getListScore import math l = Log(filename="foo", verbose=True) topCategories = ['Biology', 'Medicine', 'Sports', 'Culture', 'Technology', 'Education', 'Health', 'Business', 'Belief', 'Humanities', 'Society', 'Life', 'Arts', 'Language', 'Law', 'History', 'Geography', 'Agriculture', 'Politics', 'Mathematics', 'Science', 'Nature', 'Environment', 'People', 'Chronology' ] mendeleyDisciplines = [ 'Linguistics', 'Economics', 'Psychology', 'Humanities', 'Materials Science', 'Earth Sciences', 'Environmental Sciences', 'Biological Sciences', 'Medicine', 'Mathematics', 'Chemistry', 'Physics', 'Social Sciences', 'Electrical and Electronic Engineering', 'Astronomy / Astrophysics / Space Science', 'Sports and Recreation', 'Management Science / Operations Research', 'Philosophy', 'Law', 'Business Administration', 'Engineering', 'Design', 'Arts and Literature', 'Education', 'Computer and Information Science' ] expertWords = ["university", "ph.d", "ph. d", "ph d", "phd", "professor", "doctor", "dr.", "institute", "postdoc" ]# , "post doc", "student", "research", "prof", "post prad", "science", "scientist", "department", "study", "studies", "develop"] # expertLists = list(map(lambda s: s.strip(), open("data/expert_topics", "r"))) # expertCategories = ['Medicine', 'Health' ] wordExperts = getWordExperts(expertWords) # patrickExperts = getPatrickExperts(expertCategories)
("Crossref", lambda doc: doc.numCrossrefs()), ("PubMed", lambda doc: doc.pubmedCitations), ("Scopus", lambda doc: doc.scopusCitations), ("Max Citations", lambda doc: doc.maxCitations()), ("PLOS pdf", lambda doc: doc.pdfViews), ("PLOS HTML", lambda doc: doc.htmlViews), ("PMC pdf", lambda doc: doc.pmcPdf), ("PMC HTML", lambda doc: doc.pmcHtml), ("Facebook Shares", lambda doc: doc.facebookShares), ("Facebook Comments", lambda doc: doc.facebookComments), ("Facebook Likes", lambda doc: doc.facebookLikes), ("Mendeley Readers", lambda doc: doc.mendeleyReaders), ("CiteULike", lambda doc: doc.citeULikeShares), ] l = Log(filename="foo", verbose=True) metricNames = map(lambda metric: metric[0], metrics) docs = map( lambda doc: [map(lambda tweet: tweet.username, doc.tweets), map(lambda metric: metric[1](doc), metrics)], SimpleDoc.getallBetween((2012, 6), (2012, 8)), ) negativeUsernames = map(lambda x: x[0], json.load(open("user_exclude_list_negative"))) usersTweetFrequence = {} for doc in docs: for user in doc[0]: usersTweetFrequence[user] = usersTweetFrequence.get(user, 0) + 1 lowTweetUsers = set(