Python SimpleDoc.getallBetween Examples

Programming Language: Python

Namespace/Package Name: main.util.common

Class/Type: SimpleDoc

Method/Function: getallBetween

Examples at hotexamples.com: 7

Python SimpleDoc.getallBetween - 7 examples found. These are the top rated real world Python examples of main.util.common.SimpleDoc.getallBetween extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

getall(21)

getallBetween(7)

Frequently Used Methods

getall (21)

getallBetween (7)

Example #1

Show file

File: several_histograms.py Project: Zwackelmann/PlosDataStats

def publication_years():
    plt.figure(num=None, figsize=(8, 4), dpi=80, facecolor='w', edgecolor='k')
    publicationYears = list(simpleDoc.publicationDatetime().year for simpleDoc in SimpleDoc.getallBetween(None, None))
    histDisc(plt, publicationYears, width = 0.5)
    # plt.savefig(figurePath("publication_years.png"))
    plt.tight_layout()
    plt.show()

Example #2

Show file

File: several_histograms.py Project: Zwackelmann/PlosDataStats

def tweetsBySpecificUserCorrelations():
    docs = SimpleDoc.getallBetween((2012, 6), (2012, 8))

    pairs = []
    for doc in docs:
        numTweets = len(filter(lambda tweet: tweet.username=="ATP_CME" ,doc.tweets))
        citations = doc.averageCitations()
        pairs.append([numTweets, citations])

    x, y = zip(*pairs)

    print allCorrelations(x, y)
    plt.scatter(x, y)
    plt.show()

Example #3

Show file

File: not_normal_distributed.py Project: Zwackelmann/PlosDataStats

    (("pmcViews", ()), (2011, 6), "PMC views"), 
    (("maxCitations", ()), (2009, 3), "Citations")
]

attributeNames = map(lambda x: x[0][0], attributeList)
attributePrintNames = map(lambda x: x[2], attributeList)
calls = map(lambda x: x[0], attributeList)
stats = []

for ind, attr in zip(range(0, len(attributeList)), attributeList):
    call = attr[0]
    lowerBound = attr[1]
    attName = attr[0][0]

    valuesForMetric = filter(lambda x: x != None, map(lambda doc: applyCall(doc, call),
        SimpleDoc.getallBetween(lowerBound, None)
    ))

    minV, maxV, meanV, std = min(valuesForMetric), max(valuesForMetric), np.mean(valuesForMetric), np.std(valuesForMetric)
    stats.append((attName, call, meanV, std, len(valuesForMetric)))
    print attName + "\t" + "\t".join(map(lambda x: str(x), [minV, maxV, meanV, std]))


statValues = []
for stat in stats:
    name = stat[0]
    call = stat[1]
    mean = stat[2]
    std = stat[3]
    numValues = stat[4]

Example #4

Show file

File: correlations_with_experts.py Project: Zwackelmann/PlosDataStats

# expertCategories = ['Medicine', 'Health' ]

wordExperts = getWordExperts(expertWords)
# patrickExperts = getPatrickExperts(expertCategories)

"""bioDocs = minimizedDocs(
    filter(
        lambda doc: 
            doc.mendeleyDisciplines != None and 'Biological Sciences' in doc.mendeleyDisciplines, 
            SimpleDoc.getallBetween((2012,6), (2012,8))
    ),
    metrics
)"""

docs = minimizedDocs(
    SimpleDoc.getallBetween((2012,6), (2012,8)),
    metrics
)

usersInTimewindow = set((usr for doc in docs for usr in doc[0]))
totalNumTweets = sum((1 for doc in docs for u in doc[0]))

"""f = open("baselines", "w")

for numTweets in range(100, totalNumTweets, 100):
    print str(numTweets) + " / " + str(totalNumTweets)
    baseline = getBaseline(docs, metricNames, numTweets)
    f.write(json.dumps( { "num-tweets" : numTweets, "baseline" : baseline } ) + "\n")
    f.flush()

f.close()"""

Example #5

Show file

File: correlation_with_list_experts.py Project: Zwackelmann/PlosDataStats

import json
from main.util.db import openDb
from scipy import stats
from main.util.common import SimpleDoc, powerset, Log
import math
import itertools

expertTopics = list(map(lambda s: s.strip(), open("data/expert_topics", "r")))

l = Log(filename="foo", verbose=True)

docs = map(lambda doc: [map(lambda tweet: tweet.username, doc.tweets), map(lambda metric: metric[1](doc), metrics)], SimpleDoc.getallBetween((2012,6), (2012,8)))

baseline = { }

for ind, metricName in zip(range(0, len(metricNames)), metricNames):
    pairs = []
    for doc in docs:
        numTweets = len(doc[0])
        metricScore = doc[1][ind]
        pairs.append([numTweets, metricScore])

    x, y = zip(*pairs)
    s, p = stats.spearmanr(x, y)

    baseline[metricName] = s

count = 0
count2 = 0
for ind, metricName in zip(range(0, len(metricNames)), metricNames):
    pairs = []

Example #6

Show file

File: crawl_tweets_for_sentiment_analysis.py Project: Zwackelmann/PlosDataStats

def canBeEncoded(text):
    try:
        str(text)
        return True
    except UnicodeEncodeError:
        return False

def tweetsBetweenDay(documents, lowerBound, upperBound):
    return [[tweet.text, tweet.timestamp, tweet.username, doc.doi, doc.title, doc.publicationTimestamp] for doc in documents for tweet in doc.tweets 
        if 
            ((lowerBound*60*60*24) <= (tweet.timestamp - doc.publicationTimestamp) <= (upperBound*60*60*24)) and
            canBeEncoded(tweet.text) and
            canBeEncoded(doc.title)
    ]

relevantDocuments = SimpleDoc.getallBetween((2012, 6), (2012, 8))

tweets = []
tweets.extend(random.sample(tweetsBetweenDay(relevantDocuments, 0, 1), 111))
tweets.extend(random.sample(tweetsBetweenDay(relevantDocuments, 1, 3), 111))
tweets.extend(random.sample(tweetsBetweenDay(relevantDocuments, 3, 5), 111))
tweets.extend(random.sample(tweetsBetweenDay(relevantDocuments, 7, 30), 333))
tweets.extend(random.sample(tweetsBetweenDay(relevantDocuments, 100, 300), 333))

tweetTexts = map(lambda tweetdata: "\t".join([str(tweetdata[0]), str(tweetdata[1]), tweetdata[2], tweetdata[3], tweetdata[4], str(tweetdata[5])]), tweets)
random.shuffle(tweetTexts)

f = open("tweetTexts_1.txt", "w")
for text in tweetTexts[0:333]:
    f.write(text.replace("\n", " ").replace("\"", "").replace("'", "") + "\n")
f.close()

Example #7

Show file

File: correlation_between_everything.py Project: Zwackelmann/PlosDataStats

    valuesForMetric = filter(lambda x: x != None, map(lambda doc: applyCall(doc, call),
        SimpleDoc.getallBetween(lowerBound, None)
    ))

    minV, maxV, meanV, std = min(valuesForMetric), max(valuesForMetric), np.mean(valuesForMetric), np.std(valuesForMetric)
    print attName + "\t" + "\t".join(map(lambda x: str(x), [minV, maxV, meanV, std]))
"""

cat = "Biological Sciences"
"""consideredDocs = filter(
        lambda doc: 
            doc.mendeleyDisciplines != None and cat in doc.mendeleyDisciplines, 
            SimpleDoc.getallBetween((2012,6), (2012,8))
    )"""
consideredDocs = SimpleDoc.getallBetween((2012,6), (2012,8))
print len(consideredDocs)
matrix = getAttributeValueMatrix(consideredDocs, calls)
corrs = correlationBetweenEverything(matrix, attributeNames)

"""f = open("foo", "w")
for corr in corrs:
    f.write(corr.toJson() + "\n")
f.close()"""

# corrs = CorrelationItem.fromFile("stuff/pairwise_corr_2012-6_2012-8.json")

f = open("foo", "w")
m = []
for a1 in attributeNames:
    row = []