def getRelatedTags( tags ): """ fetches the related tags for the given tags @param list of tags @return dictionary of related tags with count """ if type(tags).__name__ == 'str': url = Twitter.TWITTER_SEARCH_URL % tags else: url = Twitter.TWITTER_SEARCH_URL % "+".join(tags) f = Retrieve(Twitter.__name__).open(url) # convert json into dict and remove null values with "" search_results = eval(re.sub('null', '""', f.read())) found_tags = [] related_tags = {} for result in search_results['results']: found_tags.extend(Twitter.RE_FIND_TAGS.findall( result['text'])) for tag in found_tags: related_tags[tag.lower()] = related_tags.get(tag.lower(), 0) + 1 # todo: sort return related_tags
def getRelatedTags(tags): """ fetches the related tags for the given tags @param list of tags @return dictionary of related tags with count """ if type(tags).__name__ == 'str': url = Twitter.TWITTER_SEARCH_URL % tags else: url = Twitter.TWITTER_SEARCH_URL % "+".join(tags) f = Retrieve(Twitter.__name__).open(url) # convert json into dict and remove null values with "" search_results = eval(re.sub('null', '""', f.read())) found_tags = [] related_tags = {} for result in search_results['results']: found_tags.extend(Twitter.RE_FIND_TAGS.findall(result['text'])) for tag in found_tags: related_tags[tag.lower()] = related_tags.get(tag.lower(), 0) + 1 # todo: sort return related_tags
def get_content(url): """ returns the content from Flickr """ assert(url.startswith("http")) f = Retrieve(Flickr.__name__).open(url) content = f.read() f.close() return content
def get_content(url): """ returns the content from Flickr """ assert (url.startswith("http")) f = Retrieve(Flickr.__name__).open(url) content = f.read() f.close() return content
def testRetrievalTimeout(self): ''' tests whether the socket timeout is honored by our class ''' SLOW_URL = "http://www.csse.uwa.edu.au/" with raises(urllib2.URLError): r = Retrieve(self.__class__.__name__, default_timeout=0.1).open(SLOW_URL) content = r.read() r.close()
def _get_content( url ): """ returns the content from delicious """ assert( url.startswith("http") ) f = Retrieve(Delicious.__name__).open(url) content = f.read() f.close() sleep(1) return content
def _get_content(url): """ returns the content from delicious """ assert(url.startswith("http")) f = Retrieve(Delicious.__name__).open(url) content = f.read() f.close() sleep(1) return content
def t_retrieve(url): ''' retrieves the given url from the web @remarks helper module for the testMultiProcessing unit test. ''' r = Retrieve(__name__).open(url) try: content = r.read() finally: # this is required as GzipFile does not support the context protocol # in python 2.6 r.close() return content
def get_content(url): """ returns the content from Technorati """ assert(url.startswith("http")) logger.debug('Fetching content for URL %s' % url) if (time.time() - Technorati.last_access) < SLEEP_TIME: logger.debug('Sleeping %s seconds!' % SLEEP_TIME) time.sleep(SLEEP_TIME) Technorati.last_access = time.time() f = Retrieve("%s_new" % Technorati.__name__).open(url) content = f.read() f.close() return content