예제 #1
0
파일: tasks.py 프로젝트: nostalgiaz/my_info
def create_info_page_task(username, elaboration_id, url):
    elaboration = Elaboration(elaboration_id=elaboration_id)
    redis = RedisCache()
    twitter = get_twitter_from_username(username)
    info = twitter.show_user(screen_name=username)
    user = User.objects.get(username=username)

    ###########################################################################
    # PERSONAL INFORMATION
    ###########################################################################

    try:
        user_info = UserInfo.objects.get(user=user)
    except UserInfo.DoesNotExist:
        user_info = UserInfo()

    user_info.user = user
    user_info.full_name = info['name']
    user_info.nick = info['screen_name']
    user_info.bio = info['description']
    user_info.image = info['profile_image_url'].replace('_normal', '')
    user_info.save()

    ###########################################################################
    # CLUSTER & TWEETS
    ###########################################################################
    k = 50
    if NUMBER_OF_TWEETS <= 20:  # debug
        k = 10

    redis.set('{}:step'.format(elaboration_id), 1)
    # clusterify = KMeansClusterify(TwitterProfileReader(username), k)
    clusterify = AffinityPropagationClusterify(TwitterProfileReader(username), k)
    # clusterify = SpectralClusterify(TwitterProfileReader(username), k)
    # clusterify = StarClusterify(TwitterProfileReader(username))

    redis.set('{}:step'.format(elaboration_id), 2)
    elaboration.tweets = clusterify.annotate()

    redis.set('{}:step'.format(elaboration_id), 3)
    elaboration.cluster = clusterify.do_cluster()

    logger.info(elaboration.cluster)

    redis.set('{}:step'.format(elaboration_id), 4)  # exit code

    elaboration.user = user
    elaboration.save()

    subject, to = "Work done!", user.email

    text_content = "Checkout the final elaboration here: {}".format(url)

    html_content = '<p>Checkout the final elaboration <a href="{}">' \
                   'here</a></p>'.format(url)

    msg = EmailMultiAlternatives(subject, text_content, EMAIL_HOST_USER, [to])
    msg.attach_alternative(html_content, "text/html")
    msg.send()
예제 #2
0
파일: views.py 프로젝트: nostalgiaz/my_info
def start_elaboration(request):
    user = request.user
    redis = RedisCache()
    elaboration_id = sha1(user.username + str(datetime.now())).hexdigest()

    redis.set('{}:step'.format(elaboration_id), 0)

    create_info_page_task.delay(
        user.username, elaboration_id, request.build_absolute_uri(
            reverse('show_info_page', args=[elaboration_id])
        )
    )

    return HttpResponseRedirect(
        reverse('elaboration', args=[elaboration_id])
    )
예제 #3
0
class InterWikiRecon(object):
    def __init__(self):
        self.cache = RedisCache()
        self.requests = requests.session()

    def get_inter_wikilinks(self, page):
        """
        >>> a = InterWikiRecon()
        >>> a.get_inter_wikilinks('http://it.wikipedia.org/wiki/Mozilla')
        {u'EN': u'http://en.wikipedia.org/wiki/Mozilla', \
u'IT': u'http://it.wikipedia.org/wiki/Mozilla'}
        >>> a.get_inter_wikilinks('http://en.wikipedia.org/wiki/Mozilla')
        {u'EN': u'http://en.wikipedia.org/wiki/Mozilla', \
u'IT': u'http://it.wikipedia.org/wiki/Mozilla'}
        """
        key = "{}:interwikirecon".format(page)
        if not self.cache.has(key):

            given_lang = 'IT' if '://it.' in page else 'EN'
            new_lang = 'IT' if given_lang == 'EN' else 'EN'

            url = 'http://interwikirecon.spaziodati.eu/reconcile?' \
                  'queries={"q0":{"query": "%s", "type": "Wikipedia %s"}}' \
                  '' % (page, new_lang)

            try:
                new_page = self.requests.get(url).json()['q0']['result'][0]['id']
            except IndexError:
                new_page = None

            self.cache.set(key, {
                given_lang: page,
                new_lang: new_page,
            })

        return self.cache.get(key)
예제 #4
0
class DataTXT(object):
    def __init__(self):
        self.interWikiRecon = InterWikiRecon()
        self.cache = RedisCache()
        self.requests = requests.session()
        self.datatxt = datatxt.DataTXT(
            app_id=DATATXT_APP_ID,
            app_key=DATATXT_APP_KEY,
        )

    def nex(self, *args):
        try:
            annotated = self.datatxt.nex(
                args[0],
                min_confidence=.6,
                parse_hashtag=True,
                epsilon=.5,
            )
            id_ = sha1(annotated.lang + str(annotated.annotations)).hexdigest()

            return {
                'id': id_,
                'lang': annotated.lang,
                'annotations': {
                    a.uri: a for a in annotated.annotations
                }
            }
        except DandelionException:
            logger.info("dandelion exception: " + args[0])

    def _rel_request(self, lang, topic1, topic2):
        if all(x is None for x in topic1) or all(x is None for x in topic2):
            return {}

        url = 'http://api.dandelion.eu/datatxt/rel/v1'
        response = self.requests.get(url, params={
            'lang': lang,
            'topic1': topic1,
            'topic2': topic2,
            '$app_id': DATATXT_APP_ID,
            '$app_key': DATATXT_APP_KEY,
        })

        if response.ok:
            return response.json()
        print response.json()
        return {}

    def rel(self, topics1, topics2, enable_cache=True):
        rel = zeros((len(topics1), len(topics2)))

        response = None
        for i, topic1 in enumerate(topics1):
            for j, topic2 in enumerate(topics2):
                cache_key = "{}-{}:relatedness".format(topic1, topic2)
                if enable_cache and self.cache.has(cache_key):
                    value = self.cache.get(cache_key)
                else:
                    if response is None:
                        response = self._rel(topics1, topics2)
                    value = response[i][j]
                    self.cache.set(cache_key, value)

                rel[i][j] = value
        return rel

    def _rel(self, topics1, topics2):

        def lang_topic(topics, wanted_lang):
            for topic in topics:
                lang = 'it' if '://it.' in topic else 'en'
                if lang == wanted_lang:
                    yield topic
                else:
                    pages = self.interWikiRecon.get_inter_wikilinks(topic)
                    yield pages.get(wanted_lang.upper())

        it_topics1 = list(lang_topic(topics1, 'it'))
        it_topics2 = list(lang_topic(topics2, 'it'))
        en_topics1 = list(lang_topic(topics1, 'en'))
        en_topics2 = list(lang_topic(topics2, 'en'))

        it_response = {
            tuple(sorted([x['topic1']['topic']['uri'], x['topic2']['topic']['uri']])): x['weight']
            for x in self._rel_request('it', it_topics1, it_topics2).get('relatedness', [])
            if not x.get('error', False)
        }
        en_response = {
            tuple(sorted([x['topic1']['topic']['uri'], x['topic2']['topic']['uri']])): x['weight']
            for x in self._rel_request('en', en_topics1, en_topics2).get('relatedness', [])
            if not x.get('error', False)
        }

        response = zeros((len(topics1), len(topics2)))
        for i, (it_topic1, en_topic1) in enumerate(zip(it_topics1, en_topics1)):
            for j, (it_topic2, en_topic2) in enumerate(zip(it_topics2, en_topics2)):
                value_it = it_response.get(
                    tuple(sorted([it_topic1, it_topic2])), 0.
                )
                value_en = en_response.get(
                    tuple(sorted([en_topic1, en_topic2])), 0.
                )

                response[i][j] = max(value_it, value_en)
        return response