Beispiel #1
0
def category(request, name):
    from data_categories import CATEGORIES
    
    name = CATEGORY_NAME_PREFIX + name
    category = CATEGORIES.get(name)
    if not category:
        raise Http404
    #print category.get_absolute_url()
    
    parents = category.parents.order_by('display')
    children = category.children.order_by('display')
    try:
        changes = []
        annotations = []
        for chao in category.chao.all():
            for change in chao.changes.filter(Change.relevant_filter).order_by('-timestamp'):
                changes.append(change)
            for annotation in chao.annotations.filter(Annotation.relevant_filter).order_by('-created'):
                annotations.append(annotation)
        #print changes
        #print annotations
    except OntologyComponent.DoesNotExist:
        changes = annotations = []
    
    timeline_changes = get_weekly(changes, lambda c: c.timestamp.date() if c.timestamp else None, to_ordinal=True,
        min_date=MIN_CHANGES_DATE, max_date=MAX_CHANGES_DATE)
    timeline_annotations = get_weekly(annotations, lambda a: a.created.date() if a.created else None, to_ordinal=True,
        min_date=MIN_CHANGES_DATE, max_date=MAX_CHANGES_DATE)
    
    authors = counts(change.author_id for change in changes + annotations)
    authors = [{'label': name[len(settings.INSTANCE):], 'data': count} for name, count in sorted(authors.iteritems(),
        key=lambda (n, c): c, reverse=True)]
    
    #x, y = GRAPH_POSITIONS[settings.DEFAULT_LAYOUT][category.name]
    x, y = category.get_pos(settings.DEFAULT_LAYOUT)
    network_url = reverse('icd.views.network') + '#x=%f&y=%f&z=2' % (x, y)
    
    return render_to_response('category.html', {
        'category': category,
        'parents': parents,
        'children': children,
        'changes': changes,
        'annotations': annotations,
        'network_url': network_url,
        'timeline_changes': timeline_changes,
        'timeline_annotations': timeline_annotations,
        'authors': authors,
    }, context_instance=RequestContext(request))
Beispiel #2
0
import util
import matplotlib.pyplot as plt
import mysql.connector

if __name__ == "__main__":
    connection = mysql.connector.connect(host="127.0.0.1",
                                         port=3307,
                                         user='******',
                                         password='******',
                                         db='weibo')
    cur = connection.cursor()

    hot_keyword = util.get_top_hot_words("data/result.json", True, True, True,
                                         10)

    search_result = util.search_by_single_keyword(cur, "人传人")
    freq1 = util.counts(search_result, "2020-01-20 00:00:00",
                        "2020-01-25 00:00:00", 'H')
    search_result = util.search_by_single_keyword(cur, "钟南山")
    freq2 = util.counts(search_result, "2020-01-20 00:00:00",
                        "2020-01-25 00:00:00", "H")
    plt.scatter(freq1["counts"], freq2["counts"])
    plt.savefig("images/scatter_.png")
Beispiel #3
0
def test_count():
    connection = mysql.connector.connect(host="127.0.0.1",
                                         port=3307,
                                         user='******',
                                         password='******',
                                         db='weibo')

    cur = connection.cursor()

    result = util.counts(cur, "人传人", "", "IGNORE", "2020-01-21 00:00:00",
                         "2020-01-25 00:00:00", 'H')

    assert result["counts"][0] >= 3104
    assert result["counts"][1] >= 1273
    assert result["counts"][2] >= 588
    assert result["counts"][3] >= 356
    assert result["counts"][4] >= 303
    assert result["counts"][5] >= 483
    assert result["counts"][6] >= 906
    assert result["counts"][7] >= 2037
    assert result["counts"][8] >= 2519
    assert result["counts"][9] >= 2587
    assert result["counts"][10] >= 2072

    result = util.counts(cur, "一级响应", "", "IGNORE", "2020-01-23 22:00:00",
                         "2020-01-25 00:00:00", 'H')

    assert result["counts"][0] >= 67
    assert result["counts"][1] >= 172
    assert result["counts"][2] >= 128
    assert result["counts"][3] >= 82
    assert result["counts"][4] >= 20
    assert result["counts"][5] >= 9
    assert result["counts"][6] >= 11
    assert result["counts"][7] >= 8
    assert result["counts"][8] >= 10
    assert result["counts"][9] >= 32
    assert result["counts"][10] >= 45
    assert result["counts"][11] >= 40
    assert result["counts"][12] >= 45
    assert result["counts"][13] >= 33
    assert result["counts"][14] >= 148
    assert result["counts"][15] >= 191
    assert result["counts"][16] >= 180
    assert result["counts"][17] >= 192

    result = util.counts(cur, "浙江", "一级响应", "AND", "2020-01-23 22:00:00",
                         "2020-01-25 00:00:00", 'H')

    assert result["counts"][0] >= 52
    assert result["counts"][1] >= 78
    assert result["counts"][2] >= 62
    assert result["counts"][3] >= 49
    assert result["counts"][4] >= 11
    assert result["counts"][5] >= 7
    assert result["counts"][6] >= 5
    assert result["counts"][7] >= 6
    assert result["counts"][8] >= 7
    assert result["counts"][9] >= 26
    assert result["counts"][10] >= 34
    assert result["counts"][11] >= 36
    assert result["counts"][12] >= 41
    assert result["counts"][13] >= 27
    assert result["counts"][14] >= 121
    assert result["counts"][15] >= 161
    assert result["counts"][16] >= 134
    assert result["counts"][17] >= 162

    assert result["counts"][0] <= 67
    assert result["counts"][1] <= 172
    assert result["counts"][2] <= 128
    assert result["counts"][3] <= 82
    assert result["counts"][4] <= 20
    assert result["counts"][5] <= 9
    assert result["counts"][6] <= 11
    assert result["counts"][7] <= 8
    assert result["counts"][8] <= 10
    assert result["counts"][9] <= 32
    assert result["counts"][10] <= 45
    assert result["counts"][11] <= 40
    assert result["counts"][12] <= 45
    assert result["counts"][13] <= 33
    assert result["counts"][14] <= 148
    assert result["counts"][15] <= 191
    assert result["counts"][16] <= 180
    assert result["counts"][17] <= 192

    result = util.counts(cur, "浙江", "广东", "OR", "2020-01-23 22:00:00",
                         "2020-01-25 00:00:00", 'H')

    assert result["counts"][0] >= 103
    assert result["counts"][1] >= 197
    assert result["counts"][2] >= 146
    assert result["counts"][3] >= 96
    assert result["counts"][4] >= 31
    assert result["counts"][5] >= 19
    assert result["counts"][6] >= 17
    assert result["counts"][7] >= 16
    assert result["counts"][8] >= 21

    result = util.counts(cur, "浙江", "广东", "ERRORTEST", "2020-01-23 22:00:00",
                         "2020-01-25 00:00:00", 'H')

    assert len(result["counts"]) == 0
Beispiel #4
0
def category(request, name):
    INSTANCE = get_instance(request)
    #from data_categories import CATEGORIES
    test = ""
    if name.startswith("http:/") and not name.startswith("http://"):
        test = "i do parse wrong"
        name = name.replace("http:/", "http://", 1)
    print name
    category = data_categories.CATEGORIES[INSTANCE].get(name)
    if not category:
        raise Http404
    #print category.get_absolute_url()
    
    parents = category.parents.order_by('display')
    children = category.children.order_by('display')
    try:
        changes = []
        annotations = []
        for chao in category.chao.all():
            for change in chao.changes.filter(relevant_filter(INSTANCE)).order_by('-timestamp'):
                changes.append(change)
            for annotation in chao.annotations.filter(Annotation.relevant_filter).order_by('-created'):
                annotations.append(annotation)
        #print changes
        #print annotations
    except OntologyComponent.DoesNotExist:
        changes = annotations = []
    
    tag_activity = [{'label': 'Primary Community Changes', 'data': category.metrics.primary_tag_changes}, 
                    {'label': 'Secondary Community Changes', 'data': category.metrics.secondary_tag_changes}, 
                    {'label': 'Involved Community Changes', 'data': category.metrics.involved_tag_changes}, 
                    {'label': 'WHO Team Community Changes', 'data': category.metrics.who_tag_changes},
                    {'label': 'Outside Community Changes', 'data': category.metrics.outside_tag_changes}]
    
    
    timeline_changes = get_weekly(changes, lambda c: c.timestamp.date() if c.timestamp else None, to_ordinal=True,
        min_date=data.MIN_CHANGES_DATE[INSTANCE], max_date=data.MAX_CHANGES_DATE[INSTANCE])
    timeline_annotations = get_weekly(annotations, lambda a: a.created.date() if a.created else None, to_ordinal=True,
        min_date=data.MIN_CHANGES_DATE[INSTANCE], max_date=data.MAX_CHANGES_DATE[INSTANCE])
    
    authors = counts(change.author_id for change in changes + annotations)
    authors = [{'label': name[len(INSTANCE):], 'data': count} for name, count in sorted(authors.iteritems(),
        key=lambda (n, c): c, reverse=True)]
    
    titles = category.category_titles.all()
    definitions = category.category_definitions.all()
    involved_tags = category.involved_tags.all()
    
    #x, y = GRAPH_POSITIONS[settings.DEFAULT_LAYOUT][category.name]
    x, y = category.get_pos(settings.DEFAULT_LAYOUT)
    network_url = reverse('icd.views.network') + '#x=%f&y=%f&z=2' % (x, y)
    #reCoIndations = [x.reCoInd for x in category.similarity_reCoIndations.all().order_by("-tag_similarity")[:10]]
    #coeditor_reCoIndations = [x.reCoInd for x in category.amazon_reCoIndations.all().order_by("-tag_similarity")]
    return render_to_response('category.html', {
        'instance': INSTANCE,
        #'instances': settings.INSTANCES,
        'name': name,
        'test': test,
        'category': category,
        'parents': parents,
        'children': children,
        'changes': changes,
        'annotations': annotations,
        'network_url': network_url,
        'timeline_changes': timeline_changes,
        'timeline_annotations': timeline_annotations,
        'authors': authors,
        'titles': titles,
        'tag_activity': tag_activity,
        #'reCoIndations': reCoIndations,
        'definitions': definitions,
        'involved_tags': involved_tags,
        #'coeditor_reCoIndations': coeditor_reCoIndations,
    }, context_instance=RequestContext(request))
Beispiel #5
0
pvals_pri, pvals_fol = (primary_data.flat[0]['pvals'],
                        followup_data.flat[0]['pvals'])

# Extract raw data for permutation testing
rvs_a_pri, rvs_b_pri = (primary_data.flat[0]['rvs_a'],
                        primary_data.flat[0]['rvs_b'])

rvs_a_fol, rvs_b_fol = (followup_data.flat[0]['rvs_a'],
                        followup_data.flat[0]['rvs_b'])
"""Define analysis parameters."""
n_iterations, n_effect_sizes, nl, _ = np.shape(pvals_pri)
emph_primary = 0.1
alpha = 0.05
method = qvalue
sl = 30  # TODO: save to .npy file.
"""Compute reproducibility rates."""
rr = np.zeros([n_iterations, n_effect_sizes])

for ind in np.ndindex(n_iterations, n_effect_sizes):
    print('Analysis iteration %3d' % (1 + ind[0]))
    replicable = repl(pvals_pri[ind].flatten(), pvals_fol[ind].flatten(),
                      emph_primary, method, alpha)
    replicable = np.reshape(replicable, [nl, nl])
    rr[ind] = counts(replicable, nl, sl)[0]
"""Save data to disk."""
output_fpath = fpath
output_fname = output_fpath + ('/result-%s.npy' % method.__name__)

np.save(output_fname, {'rr': rr})
print('Results saved to disk.')