def post_published_for_bright_mirror_participant( request, test_session, discussion, admin_user, participant1_user, bright_mirror): from assembl.models import Post, Idea, LangString, IdeaRelatedPostLink, PublicationStates from graphene.relay import Node idea_id = bright_mirror raw_id = int(Node.from_global_id(idea_id)[1]) idea = Idea.get(raw_id) p = Post( discussion=discussion, creator=participant1_user, subject=LangString.create(u"Published by participant"), body=LangString.create(u"A simple published fiction by participant"), type='post', publication_state=PublicationStates.PUBLISHED, message_id="*****@*****.**", creation_date = datetime.utcnow()) idc = IdeaRelatedPostLink( idea=idea, creator=admin_user, content=p) test_session.add(p) test_session.add(idc) test_session.flush() def fin(): print "finalizer post_published_for_bright_mirror" test_session.delete(p) test_session.delete(idc) test_session.flush() request.addfinalizer(fin) return p
def post_draft_for_bright_mirror( request, test_session, discussion, moderator_user, bright_mirror): from assembl.models import Post, Idea, LangString, IdeaRelatedPostLink, PublicationStates from graphene.relay import Node idea_id = bright_mirror raw_id = int(Node.from_global_id(idea_id)[1]) idea = Idea.get(raw_id) p = Post( discussion=discussion, creator=moderator_user, subject=LangString.create(u"Draft"), body=LangString.create(u"A simple draft fiction"), type='post', publication_state=PublicationStates.DRAFT, message_id="*****@*****.**", creation_date = datetime.utcnow() - timedelta(days=7)) idc = IdeaRelatedPostLink( idea=idea, creator=moderator_user, content=p) test_session.add(p) test_session.add(idc) test_session.flush() def fin(): print "finalizer post_draft_for_bright_mirror" test_session.delete(p) test_session.delete(idc) test_session.flush() request.addfinalizer(fin) return p
def test_get_long_title_on_idea(graphql_request, idea_in_thread_phase): # This is the "What you need to know" idea_id = idea_in_thread_phase from graphene.relay import Node raw_id = int(Node.from_global_id(idea_id)[1]) from assembl.models import Idea idea = Idea.get(raw_id) idea.long_title = u'What you need to know' idea.db.flush() res = schema.execute(u""" query Idea($lang: String!, $id: ID!) { idea: node(id: $id) { ... on Idea { title(lang: $lang) longTitle description(lang: $lang) imgUrl } } } """, context_value=graphql_request, variable_values={ "id": idea_id, "lang": u'en', }) assert json.loads(json.dumps(res.data)) == { u'idea': { u'title': u'Understanding the dynamics and issues', u'longTitle': u'What you need to know', u'description': u'', u'imgUrl': None } }
def extract_voters(request): extract_votes = [] ctx = request.context user_id = request.authenticated_userid if not user_id: raise HTTPUnauthorized widget = ctx._instance user_id = request.authenticated_userid if widget.activity_state != "ended": permissions = get_permissions(user_id, ctx.get_discussion_id()) if P_ADMIN_DISC not in permissions: raise HTTPUnauthorized() user_prefs = LanguagePreferenceCollection.getCurrent() fieldnames = [ "Nom du contributeur", "Nom d'utilisateur du contributeur", "Adresse mail du contributeur", "Date/heure du vote", "Proposition" ] votes = widget.db.query(AbstractIdeaVote).filter( AbstractVoteSpecification.widget_id == widget.id).filter( AbstractIdeaVote.tombstone_date == None).order_by( AbstractIdeaVote.vote_spec_id.desc()).all() for count, vote in enumerate(votes): voter = vote.voter contributor = voter.real_name() or u"" contributor_username = voter.username_p or u"" contributor_mail = voter.get_preferred_email() or u"" vote_date = vote.vote_date or u"" proposition = Idea.get( vote.idea_id).title.best_lang(user_prefs).value or u"" vote_value = vote.vote_value if votes[count].vote_spec_id != votes[ count - 1].vote_spec_id and fieldnames[-1] != " ": fieldnames.append(" ") extract_info = { "Nom du contributeur": contributor.encode('utf-8'), "Nom d'utilisateur du contributeur": contributor_username.encode('utf-8'), "Adresse mail du contributeur": contributor_mail.encode('utf-8'), "Date/heure du vote": str(vote_date), "Proposition": proposition.encode('utf-8'), } if vote.type == u'token_idea_vote': token_category = vote.token_category.name.best_lang( user_prefs).value or u"" token_category_encoded = token_category.encode('utf-8') if token_category_encoded not in fieldnames: fieldnames.append(token_category_encoded) extract_info.update({token_category: str(vote_value)}) extract_votes.append(extract_info) if vote.type == u'gauge_idea_vote': spec = vote.vote_spec if isinstance(spec, NumberGaugeVoteSpecification): for choice_value in range_float(spec.minimum, spec.maximum, spec.nb_ticks): option = u"{} {}".format(choice_value, spec.unit).encode('utf-8') if option not in fieldnames: fieldnames.append(option) extract_info.update( {option: "1" if vote_value == choice_value else "0"}) else: for choice in spec.get_choices(): option = choice.label.best_lang(user_prefs).value.encode( 'utf-8') if option not in fieldnames: fieldnames.append(option) extract_info.update( {option: "1" if vote_value == choice.value else "0"}) extract_votes.append(extract_info) extract_votes.sort(key=operator.itemgetter('Nom du contributeur')) return csv_response( extract_votes, CSV_MIMETYPE, fieldnames, content_disposition='attachment; filename="detailed_vote_results.csv"')
def as_html(discussion, f=None, min_samples=4): if not f: f = open('output.html', 'w') results = get_all_results(discussion, min_samples=min_samples) results = [( silhouette_score, idea_id, compare_with_ideas, clusters, post_info) for idea_id, (silhouette_score, compare_with_ideas, clusters, post_info) in results.iteritems()] results.sort(reverse=True) f.write("<html><body>") for (silhouette_score, idea_id, compare_with_ideas, clusters, post_info ) in results: if idea_id: idea = Idea.get(idea_id) f.write("<h1>Idea %d: [%f] %s</h1>\n" % ( idea_id, silhouette_score or 0, (idea.short_title or '').encode('utf-8'))) else: f.write("<h1>Discussion %s</h1>" % discussion.topic.encode('utf-8')) if len(clusters) > 1: f.write("<p><b>Cluster size: %s</b>, remainder %d</p>\n" % ( ', '.join((str(len(ci['cluster'])) for ci in clusters[:-1])), len(clusters[-1]['cluster']))) if (compare_with_ideas): f.write("<dl>\n") for k, v in compare_with_ideas.iteritems(): f.write("<dt>%s</dt><dd>%s</dd>\n" % (k, v)) f.write("</dl>\n") children_ids = set(chain(*( cli['idea_scores'].keys() for cli in clusters))) post_counts_per_idea = { child_id: len([post_id for (post_id, pinfo) in post_info.iteritems() if child_id in pinfo['ideas']]) for child_id in children_ids} for n, cluster_info in enumerate(clusters): is_remainder = 'features' not in cluster_info cluster = cluster_info['cluster'] features = cluster_info.get('features', {}) idea_scores = cluster_info['idea_scores'] if is_remainder: f.write("<h2>Remainder:</h2>\n<ol>") else: f.write("<h2>Cluster %d</h2>\n<ol>" % (n,)) for idea_id, score in idea_scores.iteritems(): idea = Idea.get(idea_id) f.write("<li>Idea %d: %d/%d %s</li>\n" % ( idea_id, score, post_counts_per_idea[idea_id], (idea.short_title or '').encode('utf-8'))) f.write("</ol>\n") if features: f.write("<p><b>Positive:</b> %s</p>\n" % ( u", ".join(features[0])).encode('utf-8')) f.write("<p><b>Negative:</b> %s</p>\n" % ( u", ".join(features[1])).encode('utf-8')) f.write("<dl>\n") for post_id in cluster: f.write("<dt>Post %d (%s):</dt>\n" % ( post_id, ','.join(( str(p) for p in post_info[post_id]['ideas'])))) f.write("<dd>%s</dd>" % ( post_info[post_id]['text'].encode('utf-8'))) f.write("</dl>\n") f.write("</body></html>") return f
def get_cluster_info( discussion_id, idea_id=None, num_topics=200, passes=5, silhouette_cutoff=0.05, algorithm="DBSCAN", **algo_kwargs): metric = algo_kwargs.get('metric', 'cosine') if idea_id: idea = Idea.get(idea_id) discussion = idea.discussion else: idea = None discussion = Discussion.get(discussion_id) _, tfidf_model, gensim_model = get_discussion_semantic_analysis( discussion_id, num_topics=num_topics, # passes=passes) model_cls=gmodels.lsimodel.LsiModel) if not tfidf_model or not gensim_model: return lang = discussion.discussion_locales[0].split('_')[0] dirname = join(nlp_data, lang) stemmer = get_stemmer(lang) trans = identity if not isinstance(stemmer, DummyStemmer): stemmer = ReversibleStemmer( stemmer, join(dirname, STEMS_FNAME)) def trans(x): return stemmer.reverse.get(x, x) corpus = IdMmCorpus(join(dirname, CORPUS_FNAME)) # TODO: Orphans if idea: post_ids = post_ids_of(idea) else: post_ids = [x for (x,) in discussion.db.query( Content.id).filter_by(discussion_id=discussion_id).all()] if len(post_ids) < 10: return post_id_by_index = {n: post_id for (n, post_id) in enumerate(post_ids)} index_by_post_id = {post_id: n for (n, post_id) in enumerate(post_ids)} subcorpus = corpus[post_ids] tfidf_corpus = tfidf_model[subcorpus] if isinstance(gensim_model, gmodels.lsimodel.LsiModel): topic_intensities = (gensim_model.projection.s / gensim_model.projection.s[0]) else: topic_intensities = numpy.ones((num_topics,)) model_matrix = gensimvecs_to_csr( gensim_model[tfidf_corpus], num_topics, topic_intensities) if 'eps' not in algo_kwargs: # This is silly, but approximate eps with optics o = Optics(algo_kwargs.get('min_samples', 4), metric) o.calculate_distances(model_matrix.todense()) RD = o.RD print "optics result:", RD a, b = min(RD[1:]), max(RD) eps = a + (b - a) * 0.5 print "epsilon", eps algo_kwargs['eps'] = eps algorithm = getattr(sklearn.cluster, algorithm) algorithm = algorithm( metric=metric, algorithm=('brute' if metric == 'cosine' else 'auto'), **algo_kwargs) r = algorithm.fit(model_matrix) labels = r.labels_ n_clusters_raw = len(set(labels)) # n_clusters_ = n_clusters_raw - (1 if -1 in labels else 0) silhouette_score = None if n_clusters_raw > 1: silhouette_score = metrics.silhouette_score( model_matrix, labels, metric=metric) if silhouette_score < silhouette_cutoff: return None post_clusters = [] remainder = set(post_ids) for label in set(labels): if label == -1: continue subset = [n for (n, l) in enumerate(labels) if label == l] cluster = [post_id_by_index[n] for n in subset] remainder -= set(cluster) post_clusters.append(cluster) remainder = list(remainder) all_cluster_features = calc_features( post_ids, post_clusters, corpus, tfidf_model, gensim_model, num_topics, topic_intensities, trans) if idea: # Compare to children classification ( compare_with_ideas, all_idea_scores, ideas_of_post, children_remainder ) = compare_with_children( idea, post_ids, post_clusters, remainder, labels) else: compare_with_ideas = () ideas_of_post = defaultdict(tuple) all_idea_scores = defaultdict(dict) post_text = dict(Content.default_db.query(Content.id, Content.body).all()) post_info = { post_id: dict(ideas=ideas_of_post[post_id], cluster_id=labels[index_by_post_id[post_id]], text=post_text[post_id]) for post_id in post_ids } clusters = [ dict(cluster=cluster, features=all_cluster_features[n], idea_scores=all_idea_scores[n]) for (n, cluster) in enumerate(post_clusters) ] clusters.append(dict(cluster=remainder, idea_scores=all_idea_scores[-1])) return (silhouette_score, compare_with_ideas, clusters, post_info)
def get_data(content): """Return uid, dict of fields we want to index, return None if we don't index.""" from assembl.models import Idea, Post, SynthesisPost, AgentProfile, LangString, Extract, Question if type(content) == Idea: # only index Idea, not Thematic or Question data = {} for attr in ('creation_date', 'id', 'discussion_id'): data[attr] = getattr(content, attr) populate_from_langstring_prop(content, data, 'title') populate_from_langstring_prop(content, data, 'synthesis_title') populate_from_langstring_prop(content, data, 'description') announcement = content.get_applicable_announcement() if announcement: populate_from_langstring_prop(announcement, data, 'title', 'announcement_title') populate_from_langstring_prop(announcement, data, 'body', 'announcement_body') phase = content.get_associated_phase() if phase: data['phase_id'] = phase.id data['phase_identifier'] = phase.identifier data['message_view_override'] = content.message_view_override return get_uid(content), data elif isinstance(content, AgentProfile): data = {} for attr in ('creation_date', 'id'): data[attr] = getattr(content, attr, None) # AgentProfile doesn't have creation_date, User does. data['name'] = content.display_name() # get all discussions that the user is in via AgentStatusInDiscussion data['discussion_id'] = set( [s.discussion_id for s in content.agent_status_in_discussion]) # get discussion_id for all posts of this agent data['discussion_id'] = list(data['discussion_id'].union( [post.discussion_id for post in content.posts_created])) return get_uid(content), data elif isinstance(content, Post): data = {} data['_parent'] = 'user:{}'.format(content.creator_id) if content.parent_id is not None: data['parent_creator_id'] = content.parent.creator_id for attr in ('discussion_id', 'creation_date', 'id', 'parent_id', 'creator_id', 'sentiment_counts'): data[attr] = getattr(content, attr) data['creator_display_name'] = AgentProfile.get( content.creator_id).display_name() data['sentiment_tags'] = [ key for key in data['sentiment_counts'] if data['sentiment_counts'][key] > 0 ] like = data['sentiment_counts']['like'] disagree = data['sentiment_counts']['disagree'] dont_understand = data['sentiment_counts']['dont_understand'] more_info = data['sentiment_counts']['more_info'] all_sentiments = [like, disagree, dont_understand, more_info] data['sentiment_counts']['total'] = sum(all_sentiments) data['sentiment_counts']['popularity'] = like - disagree data['sentiment_counts']['consensus'] = max(all_sentiments) / ( (sum(all_sentiments) / len(all_sentiments)) or 1) data['sentiment_counts']['controversy'] = max(like, disagree, 1) / min( like or 1, disagree or 1) data[ 'type'] = content.type # this is the subtype (assembl_post, email...) # data['publishes_synthesis_id'] = getattr( # content, 'publishes_synthesis_id', None) phase = content.get_created_phase() if phase: data['phase_id'] = phase.id data['phase_identifier'] = phase.identifier if isinstance(content, SynthesisPost): populate_from_langstring_prop(content.publishes_synthesis, data, 'subject') populate_from_langstring_prop(content.publishes_synthesis, data, 'introduction') populate_from_langstring_prop(content.publishes_synthesis, data, 'conclusion') long_titles = [ idea.synthesis_title for idea in content.publishes_synthesis.ideas if idea.synthesis_title ] long_titles_c = defaultdict(list) for ls in long_titles: for e in ls.entries: if e.value: long_titles_c[strip_country(e.base_locale)].append( e.value) ls = LangString() for locale, values in long_titles_c.iteritems(): ls.add_value(' '.join(values), locale) populate_from_langstring(ls, data, 'ideas') else: idea_id = get_idea_id_for_post(content) if not idea_id: return None, None data['idea_id'] = idea_id related_idea = Idea.get(idea_id[0]) if isinstance(related_idea, Question): related_idea = related_idea.parents[0] data['message_view_override'] = related_idea.message_view_override # we take the title of the first idea in the list for now (in v2, posts are attached to only one idea) populate_from_langstring_prop(related_idea, data, 'title', 'idea_title') populate_from_langstring_prop(content, data, 'body') populate_from_langstring_prop(content, data, 'subject') return get_uid(content), data elif isinstance(content, Extract): data = {} for attr in ('discussion_id', 'body', 'creation_date', 'id', 'creator_id'): data[attr] = getattr(content, attr) data['post_id'] = content.content_id post = Post.get(content.content_id) populate_from_langstring_prop(post, data, 'subject') phase = post.get_created_phase() if phase: data['phase_id'] = phase.id data['phase_identifier'] = phase.identifier idea_id = get_idea_id_for_post(post) if not idea_id: return None, None data['idea_id'] = idea_id # we take the title of the first idea in the list for now (in v2, posts are attached to only one idea) related_idea = Idea.get(idea_id[0]) data['message_view_override'] = related_idea.message_view_override if isinstance(related_idea, Question): related_idea = related_idea.parents[0] populate_from_langstring_prop(related_idea, data, 'title', 'idea_title') data['extract_state'] = 'taxonomy_state.' + content.extract_state if content.extract_nature: data[ 'extract_nature'] = 'taxonomy_nature.' + content.extract_nature.name if content.extract_action: data[ 'extract_action'] = 'taxonomy_action.' + content.extract_action.name data['creator_display_name'] = AgentProfile.get( content.creator_id).display_name() return get_uid(content), data return None, None
def get_data(content): """Return uid, dict of fields we want to index, return None if we don't index.""" from assembl.models import Idea, Post, SynthesisPost, AgentProfile, LangString, Extract, Question if type(content) == Idea: # only index Idea, not Thematic or Question data = {} for attr in ('creation_date', 'id', 'discussion_id'): data[attr] = getattr(content, attr) populate_from_langstring_prop(content, data, 'title') populate_from_langstring_prop(content, data, 'synthesis_title') populate_from_langstring_prop(content, data, 'description') announcement = content.get_applicable_announcement() if announcement: populate_from_langstring_prop(announcement, data, 'title', 'announcement_title') populate_from_langstring_prop(announcement, data, 'body', 'announcement_body') phase = content.get_associated_phase() if phase: data['phase_id'] = phase.id data['phase_identifier'] = phase.identifier data['message_view_override'] = content.message_view_override return get_uid(content), data elif isinstance(content, AgentProfile): data = {} for attr in ('creation_date', 'id', 'name'): data[attr] = getattr(content, attr, None) # AgentProfile doesn't have creation_date, User does. # get all discussions that the user is in via AgentStatusInDiscussion data['discussion_id'] = set([s.discussion_id for s in content.agent_status_in_discussion]) # get discussion_id for all posts of this agent data['discussion_id'] = list( data['discussion_id'].union( [post.discussion_id for post in content.posts_created] ) ) return get_uid(content), data elif isinstance(content, Post): data = {} data['_parent'] = 'user:{}'.format(content.creator_id) if content.parent_id is not None: data['parent_creator_id'] = content.parent.creator_id for attr in ('discussion_id', 'creation_date', 'id', 'parent_id', 'creator_id', 'sentiment_counts'): data[attr] = getattr(content, attr) data['creator_display_name'] = AgentProfile.get(content.creator_id).display_name() data['sentiment_tags'] = [key for key in data['sentiment_counts'] if data['sentiment_counts'][key] > 0] like = data['sentiment_counts']['like'] disagree = data['sentiment_counts']['disagree'] dont_understand = data['sentiment_counts']['dont_understand'] more_info = data['sentiment_counts']['more_info'] all_sentiments = [like, disagree, dont_understand, more_info] data['sentiment_counts']['total'] = sum(all_sentiments) data['sentiment_counts']['popularity'] = like - disagree data['sentiment_counts']['consensus'] = max(all_sentiments) / ((sum(all_sentiments) / len(all_sentiments)) or 1) data['sentiment_counts']['controversy'] = max(like, disagree, 1) / min(like or 1, disagree or 1) data['type'] = content.type # this is the subtype (assembl_post, email...) # data['publishes_synthesis_id'] = getattr( # content, 'publishes_synthesis_id', None) phase = content.get_created_phase() if phase: data['phase_id'] = phase.id data['phase_identifier'] = phase.identifier if isinstance(content, SynthesisPost): populate_from_langstring_prop(content.publishes_synthesis, data, 'subject') populate_from_langstring_prop(content.publishes_synthesis, data, 'introduction') populate_from_langstring_prop(content.publishes_synthesis, data, 'conclusion') long_titles = [idea.synthesis_title for idea in content.publishes_synthesis.ideas if idea.synthesis_title] long_titles_c = defaultdict(list) for ls in long_titles: for e in ls.entries: if e.value: long_titles_c[strip_country(e.base_locale)].append(e.value) ls = LangString() for locale, values in long_titles_c.iteritems(): ls.add_value(' '.join(values), locale) populate_from_langstring(ls, data, 'ideas') else: idea_id = get_idea_id_for_post(content) if not idea_id: return None, None data['idea_id'] = idea_id related_idea = Idea.get(idea_id[0]) data['message_view_override'] = related_idea.message_view_override if isinstance(related_idea, Question): related_idea = related_idea.parents[0] # we take the title of the first idea in the list for now (in v2, posts are attached to only one idea) populate_from_langstring_prop( related_idea, data, 'title', 'idea_title') populate_from_langstring_prop(content, data, 'body') populate_from_langstring_prop(content, data, 'subject') return get_uid(content), data elif isinstance(content, Extract): data = {} for attr in ('discussion_id', 'body', 'creation_date', 'id', 'creator_id'): data[attr] = getattr(content, attr) data['post_id'] = content.content_id post = Post.get(content.content_id) populate_from_langstring_prop(post, data, 'subject') phase = post.get_created_phase() if phase: data['phase_id'] = phase.id data['phase_identifier'] = phase.identifier idea_id = get_idea_id_for_post(post) if not idea_id: return None, None data['idea_id'] = idea_id # we take the title of the first idea in the list for now (in v2, posts are attached to only one idea) related_idea = Idea.get(idea_id[0]) data['message_view_override'] = related_idea.message_view_override if isinstance(related_idea, Question): related_idea = related_idea.parents[0] populate_from_langstring_prop( related_idea, data, 'title', 'idea_title') data['extract_state'] = 'taxonomy_state.' + content.extract_state if content.extract_nature: data['extract_nature'] = 'taxonomy_nature.' + content.extract_nature.name if content.extract_action: data['extract_action'] = 'taxonomy_action.' + content.extract_action.name data['creator_display_name'] = AgentProfile.get(content.creator_id).display_name() return get_uid(content), data return None, None