def create_info_page_task(username, elaboration_id, url): elaboration = Elaboration(elaboration_id=elaboration_id) redis = RedisCache() twitter = get_twitter_from_username(username) info = twitter.show_user(screen_name=username) user = User.objects.get(username=username) ########################################################################### # PERSONAL INFORMATION ########################################################################### try: user_info = UserInfo.objects.get(user=user) except UserInfo.DoesNotExist: user_info = UserInfo() user_info.user = user user_info.full_name = info['name'] user_info.nick = info['screen_name'] user_info.bio = info['description'] user_info.image = info['profile_image_url'].replace('_normal', '') user_info.save() ########################################################################### # CLUSTER & TWEETS ########################################################################### k = 50 if NUMBER_OF_TWEETS <= 20: # debug k = 10 redis.set('{}:step'.format(elaboration_id), 1) # clusterify = KMeansClusterify(TwitterProfileReader(username), k) clusterify = AffinityPropagationClusterify(TwitterProfileReader(username), k) # clusterify = SpectralClusterify(TwitterProfileReader(username), k) # clusterify = StarClusterify(TwitterProfileReader(username)) redis.set('{}:step'.format(elaboration_id), 2) elaboration.tweets = clusterify.annotate() redis.set('{}:step'.format(elaboration_id), 3) elaboration.cluster = clusterify.do_cluster() logger.info(elaboration.cluster) redis.set('{}:step'.format(elaboration_id), 4) # exit code elaboration.user = user elaboration.save() subject, to = "Work done!", user.email text_content = "Checkout the final elaboration here: {}".format(url) html_content = '<p>Checkout the final elaboration <a href="{}">' \ 'here</a></p>'.format(url) msg = EmailMultiAlternatives(subject, text_content, EMAIL_HOST_USER, [to]) msg.attach_alternative(html_content, "text/html") msg.send()
def start_elaboration(request): user = request.user redis = RedisCache() elaboration_id = sha1(user.username + str(datetime.now())).hexdigest() redis.set('{}:step'.format(elaboration_id), 0) create_info_page_task.delay( user.username, elaboration_id, request.build_absolute_uri( reverse('show_info_page', args=[elaboration_id]) ) ) return HttpResponseRedirect( reverse('elaboration', args=[elaboration_id]) )
class InterWikiRecon(object): def __init__(self): self.cache = RedisCache() self.requests = requests.session() def get_inter_wikilinks(self, page): """ >>> a = InterWikiRecon() >>> a.get_inter_wikilinks('http://it.wikipedia.org/wiki/Mozilla') {u'EN': u'http://en.wikipedia.org/wiki/Mozilla', \ u'IT': u'http://it.wikipedia.org/wiki/Mozilla'} >>> a.get_inter_wikilinks('http://en.wikipedia.org/wiki/Mozilla') {u'EN': u'http://en.wikipedia.org/wiki/Mozilla', \ u'IT': u'http://it.wikipedia.org/wiki/Mozilla'} """ key = "{}:interwikirecon".format(page) if not self.cache.has(key): given_lang = 'IT' if '://it.' in page else 'EN' new_lang = 'IT' if given_lang == 'EN' else 'EN' url = 'http://interwikirecon.spaziodati.eu/reconcile?' \ 'queries={"q0":{"query": "%s", "type": "Wikipedia %s"}}' \ '' % (page, new_lang) try: new_page = self.requests.get(url).json()['q0']['result'][0]['id'] except IndexError: new_page = None self.cache.set(key, { given_lang: page, new_lang: new_page, }) return self.cache.get(key)
class DataTXT(object): def __init__(self): self.interWikiRecon = InterWikiRecon() self.cache = RedisCache() self.requests = requests.session() self.datatxt = datatxt.DataTXT( app_id=DATATXT_APP_ID, app_key=DATATXT_APP_KEY, ) def nex(self, *args): try: annotated = self.datatxt.nex( args[0], min_confidence=.6, parse_hashtag=True, epsilon=.5, ) id_ = sha1(annotated.lang + str(annotated.annotations)).hexdigest() return { 'id': id_, 'lang': annotated.lang, 'annotations': { a.uri: a for a in annotated.annotations } } except DandelionException: logger.info("dandelion exception: " + args[0]) def _rel_request(self, lang, topic1, topic2): if all(x is None for x in topic1) or all(x is None for x in topic2): return {} url = 'http://api.dandelion.eu/datatxt/rel/v1' response = self.requests.get(url, params={ 'lang': lang, 'topic1': topic1, 'topic2': topic2, '$app_id': DATATXT_APP_ID, '$app_key': DATATXT_APP_KEY, }) if response.ok: return response.json() print response.json() return {} def rel(self, topics1, topics2, enable_cache=True): rel = zeros((len(topics1), len(topics2))) response = None for i, topic1 in enumerate(topics1): for j, topic2 in enumerate(topics2): cache_key = "{}-{}:relatedness".format(topic1, topic2) if enable_cache and self.cache.has(cache_key): value = self.cache.get(cache_key) else: if response is None: response = self._rel(topics1, topics2) value = response[i][j] self.cache.set(cache_key, value) rel[i][j] = value return rel def _rel(self, topics1, topics2): def lang_topic(topics, wanted_lang): for topic in topics: lang = 'it' if '://it.' in topic else 'en' if lang == wanted_lang: yield topic else: pages = self.interWikiRecon.get_inter_wikilinks(topic) yield pages.get(wanted_lang.upper()) it_topics1 = list(lang_topic(topics1, 'it')) it_topics2 = list(lang_topic(topics2, 'it')) en_topics1 = list(lang_topic(topics1, 'en')) en_topics2 = list(lang_topic(topics2, 'en')) it_response = { tuple(sorted([x['topic1']['topic']['uri'], x['topic2']['topic']['uri']])): x['weight'] for x in self._rel_request('it', it_topics1, it_topics2).get('relatedness', []) if not x.get('error', False) } en_response = { tuple(sorted([x['topic1']['topic']['uri'], x['topic2']['topic']['uri']])): x['weight'] for x in self._rel_request('en', en_topics1, en_topics2).get('relatedness', []) if not x.get('error', False) } response = zeros((len(topics1), len(topics2))) for i, (it_topic1, en_topic1) in enumerate(zip(it_topics1, en_topics1)): for j, (it_topic2, en_topic2) in enumerate(zip(it_topics2, en_topics2)): value_it = it_response.get( tuple(sorted([it_topic1, it_topic2])), 0. ) value_en = en_response.get( tuple(sorted([en_topic1, en_topic2])), 0. ) response[i][j] = max(value_it, value_en) return response