Beispiel #1
0
    def getRelatedTags( tags ):
        """ fetches the related tags for the given tags
            @param list of tags
            @return dictionary of related tags with count
        """

        if type(tags).__name__ == 'str':
            url = Twitter.TWITTER_SEARCH_URL % tags
        else:   
            url = Twitter.TWITTER_SEARCH_URL % "+".join(tags)

        f = Retrieve(Twitter.__name__).open(url)

        # convert json into dict and remove null values with ""
        search_results = eval(re.sub('null', '""', f.read()))
        found_tags = []
        related_tags = {}

        for result in search_results['results']:
            found_tags.extend(Twitter.RE_FIND_TAGS.findall( result['text']))

        for tag in found_tags:
            related_tags[tag.lower()] = related_tags.get(tag.lower(), 0) + 1

        # todo: sort

        return related_tags
Beispiel #2
0
    def getRelatedTags(tags):
        """ fetches the related tags for the given tags
            @param list of tags
            @return dictionary of related tags with count
        """

        if type(tags).__name__ == 'str':
            url = Twitter.TWITTER_SEARCH_URL % tags
        else:
            url = Twitter.TWITTER_SEARCH_URL % "+".join(tags)

        f = Retrieve(Twitter.__name__).open(url)

        # convert json into dict and remove null values with ""
        search_results = eval(re.sub('null', '""', f.read()))
        found_tags = []
        related_tags = {}

        for result in search_results['results']:
            found_tags.extend(Twitter.RE_FIND_TAGS.findall(result['text']))

        for tag in found_tags:
            related_tags[tag.lower()] = related_tags.get(tag.lower(), 0) + 1

        # todo: sort

        return related_tags
Beispiel #3
0
    def get_content(url):
        """ returns the content from Flickr """
        assert(url.startswith("http"))

        f = Retrieve(Flickr.__name__).open(url)
        content = f.read()
        f.close()
        return content
Beispiel #4
0
    def get_content(url):
        """ returns the content from Flickr """
        assert (url.startswith("http"))

        f = Retrieve(Flickr.__name__).open(url)
        content = f.read()
        f.close()
        return content
Beispiel #5
0
    def testRetrievalTimeout(self):
        ''' tests whether the socket timeout is honored by our class '''
        SLOW_URL = "http://www.csse.uwa.edu.au/"

        with raises(urllib2.URLError):
            r = Retrieve(self.__class__.__name__,
                         default_timeout=0.1).open(SLOW_URL)
            content = r.read()
            r.close()
Beispiel #6
0
    def _get_content( url ):
        """ returns the content from delicious """
        assert( url.startswith("http") )

        f = Retrieve(Delicious.__name__).open(url)
        content = f.read()
        f.close()
        sleep(1)
        return content
Beispiel #7
0
    def _get_content(url):
        """ returns the content from delicious """
        assert(url.startswith("http"))

        f = Retrieve(Delicious.__name__).open(url)
        content = f.read()
        f.close()
        sleep(1)
        return content
Beispiel #8
0
def t_retrieve(url):
    ''' retrieves the given url from the web

        @remarks
        helper module for the testMultiProcessing unit test.
    '''
    r = Retrieve(__name__).open(url)
    try:
        content = r.read()
    finally:
        # this is required as GzipFile does not support the context protocol
        # in python 2.6
        r.close()
    return content
Beispiel #9
0
    def get_content(url):
        """ returns the content from Technorati """
        assert(url.startswith("http"))

        logger.debug('Fetching content for URL %s' % url)

        if (time.time() - Technorati.last_access) < SLEEP_TIME:
            logger.debug('Sleeping %s seconds!' % SLEEP_TIME)
            time.sleep(SLEEP_TIME)

        Technorati.last_access = time.time()

        f = Retrieve("%s_new" % Technorati.__name__).open(url)
        content = f.read()
        f.close()
        return content