Python Retrieve.open Exemples, eWRT.access.http.Retrieve.open Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : extend_access_token.py Projet : project-asap/ewrt

def get_new_access_token(client_id=FACEBOOK_APPLICATION_ID,
                         client_secret=FACEBOOK_SECRET_KEY,
                         access_token=FACEBOOK_ACCESS_KEY):
    ''' '''
    url = API_URL.format(client_id=client_id,
                         client_secret=client_secret,
                         access_token=access_token)
    
    retrieve = Retrieve('fb')
    x = retrieve.open(url)
    result = x.read() 
    new_access_token = access_token
    
    for key, param in urlparse.parse_qs(result).iteritems():
        print key, param
        if key == 'access_token':
            if isinstance(param, list):
                param = param[0]
            
            if param == access_token:
                print 'access token still the same'
            else: 
                print 'got new access_token %s' % param
                new_access_token = param
                
    return new_access_token

Exemple #2

0

Afficher le fichier

class WikiPedia(object):
    """ returns an WikiPedia Object  """
    def __init__(self):
        self.r = Retrieve(WikiPedia.__name__)

    def getWikiPage(self, pageName, lang='en'):
        """ returns the given wikipedia page considering different spellings 
            @param[in] pageName
            @param[in] language (determines which wikipedia to query)
            @returns the page's wikipedia text
        """
        assert (len(lang) == 2)

        for pn in self._getPageNameAlterations(pageName):
            pageContent = self._retrievePage(pn, lang)
            if pageContent:
                return pageContent

        return None

    @staticmethod
    def _getPageNameAlterations(pageName):
        """ @returns a list of differnt names for the given page """

        alt = [
            pageName,
        ]
        if not ' ' in pageName:
            alt

        words = pageName.split(" ")
        alt.append(
            "%s %s" %
            (words[0].capitalize(), " ".join(map(str.lower, words[1:]))))
        return alt

    def _retrievePage(self, pageName, lang):
        """ retrieves the given Wiki page
            @param[in] pageName
            @param[in] language (determines which wikipedia to query)
            @returns the page's wikipedia text
        """
        param = urlencode({
            'action': 'query',
            'format': 'json',
            'export': '',
            'redirects': 'true',
            'titles': pageName
        })
        data = self.r.open(WIKIPEDIA_API_QUERY % lang, param).read()
        jsonData = eval(data)['query']
        if '-1' in jsonData['pages']:
            return None

        xmlData = jsonData['export']['*'].replace("\/", "/")
        return parseString(xmlData).getElementsByTagName(
            'text')[0].firstChild.data

Exemple #3

0

Afficher le fichier

    def testRetrieval(self):
        ''' tries to retrieve the following url's from the list '''

        r_handler = Retrieve(self.__class__.__name__)
        for url in self.TEST_URLS:
            print(url)
            r = r_handler.open(url)
            r.read()
            r.close()

Exemple #4

0

Afficher le fichier

Fichier : __init__.py Projet : k3njiy/ewrt

class WikiPedia(object):
    """ returns an WikiPedia Object  """

    def __init__(self):
        self.r = Retrieve( WikiPedia.__name__ )
    
    def getWikiPage(self, pageName, lang='en'):
        """ returns the given wikipedia page considering different spellings 
            @param[in] pageName
            @param[in] language (determines which wikipedia to query)
            @returns the page's wikipedia text
        """
        assert( len(lang)==2 )

        for pn in self._getPageNameAlterations( pageName ):
            pageContent = self._retrievePage( pn, lang )
            if pageContent:
                return pageContent

        return None

    @staticmethod
    def _getPageNameAlterations(pageName):
        """ @returns a list of differnt names for the given page """

        alt = [ pageName, ]
        if not ' ' in pageName:
            alt

        words = pageName.split(" ")
        alt.append( "%s %s" % (words[0].capitalize(), " ".join( map(str.lower, words[1:] ) )) )
        return alt

    def _retrievePage(self, pageName, lang):
        """ retrieves the given Wiki page
            @param[in] pageName
            @param[in] language (determines which wikipedia to query)
            @returns the page's wikipedia text
        """
        param = urlencode( {'action': 'query',
                            'format':'json', 
                            'export':'',
                            'redirects':'true',
                            'titles':pageName 
        })
        data = self.r.open( WIKIPEDIA_API_QUERY % lang, param ).read()
        jsonData = eval( data  )['query']
        if '-1' in jsonData['pages']:
            return None

        xmlData = jsonData['export']['*'].replace("\/","/")
        return parseString( xmlData  ).getElementsByTagName('text')[0].firstChild.data

Exemple #5

0

Afficher le fichier

Fichier : __init__.py Projet : weblyzard/ewrt

class Yahoo(TagInfoService):
    """ interfaces with yahoo's search service 
        * Search: Yahoo! BOSS
          (see http://developer.yahoo.com/search/boss)
    """
    __slots__ = ('r', )

    def __init__(self):
        self.r = Retrieve( Yahoo.__name__, sleep_time=0 )

    def query(self, terms, count=0, queryParams={} ):
        """ returns search results for the given terms
            @param[in] terms       ... a list of search terms
            @param[in] count       ... number of results to return (0 if we are
                                       interested on the search meta data only).
            @param[in] queryParams ... a dictionary of query parameters to add to
                                          the request
            @returns the search results
        """
        assert ( isinstance(terms, tuple) or isinstance(terms, list) )
        queryParams.update( {'appid': YAHOO_APP_ID,
                             'count': count,
                             'format': 'json'
        } )
        params = urlencode( queryParams )
        url = YAHOO_SEARCH_URL % "%2B".join(map( quote, terms) ) +"?"+ params
        print url
        try:
            result = eval( self.r.open(url).read().replace("\\/", "/" ))
            return result['ysearchresponse']
        except URLError:
            return ""

    @staticmethod
    def getSearchResults(query_result):
        """ returns a list of all search results returned by the given
            query result.
            @param[in] query_result     Result of the query
        """
        return [ YahooSearchResult(r) for r in query_result['resultset_web'] ] \
           if 'resultset_web' in query_result else []


    def getTagInfo(self, tag):
        """ @Override """
        return int( self.query(tag)['totalhits'] )

Exemple #6

0

Afficher le fichier

class Yahoo(TagInfoService):
    """ interfaces with yahoo's search service 
        * Search: Yahoo! BOSS
          (see http://developer.yahoo.com/search/boss)
    """
    __slots__ = ('r', )

    def __init__(self):
        self.r = Retrieve( Yahoo.__name__, sleep_time=0 )

    def query(self, terms, count=0, queryParams={} ):
        """ returns search results for the given terms
            @param[in] terms       ... a list of search terms
            @param[in] count       ... number of results to return (0 if we are
                                       interested on the search meta data only).
            @param[in] queryParams ... a dictionary of query parameters to add to
                                          the request
            @returns the search results
        """
        assert ( isinstance(terms, tuple) or isinstance(terms, list) )
        queryParams.update( {'appid': YAHOO_APP_ID,
                             'count': count,
                             'format': 'json'
        } )
        params = urlencode( queryParams )
        url = YAHOO_SEARCH_URL % "%2B".join(map( quote, terms) ) +"?"+ params
        print(url)
        try:
            result = eval( self.r.open(url).read().replace("\\/", "/" ))
            return result['ysearchresponse']
        except (timeout, URLError):
            return ""

    @staticmethod
    def getSearchResults(query_result):
        """ returns a list of all search results returned by the given
            query result.
            @param[in] query_result     Result of the query
        """
        return [ YahooSearchResult(r) for r in query_result['resultset_web'] ] \
           if 'resultset_web' in query_result else []


    def getTagInfo(self, tag):
        """ @Override """
        return int( self.query(tag)['totalhits'] )

Exemple #7

0

Afficher le fichier

Fichier : term_extractor.py Projet : yaniamac/ewrt

class YahooTermExtractor(object):
    """ interfaces with yahoo's search service 
        * Term extraction: extract terms from yahoo search
          http://developer.yahoo.com/search/content/V1/termExtraction.html
    """
    __slots__ = ('r', )

    def __init__(self):
        self.r = Retrieve( YahooTermExtractor.__name__ )

    def extractTerms(self, content):
        """ extract terms from yahoo search, see http://developer.yahoo.com/search/content/V1/termExtraction.html """ 

        params = urlencode( {'appid': YAHOO_APP_ID,
                             'context': content,
                             'output': 'json'
        })
        result = eval ( self.r.open(YAHOO_TERM_EXTRACTION_URI, params).read() )
        return result['ResultSet']['Result']

Exemple #8

0

Afficher le fichier

Fichier : term_extractor.py Projet : k3njiy/ewrt

class YahooTermExtractor(object):
    """ interfaces with yahoo's search service 
        * Term extraction: extract terms from yahoo search
          http://developer.yahoo.com/search/content/V1/termExtraction.html
    """
    __slots__ = ('r', )

    def __init__(self):
        self.r = Retrieve( YahooTermExtractor.__name__ )

    def extractTerms(self, content):
        """ extract terms from yahoo search, see http://developer.yahoo.com/search/content/V1/termExtraction.html """ 

        params = urlencode( {'appid': YAHOO_APP_ID,
                             'context': content,
                             'output': 'json'
        })
        result = eval ( self.r.open(YAHOO_TERM_EXTRACTION_URI, params).read() )
        return result['ResultSet']['Result']

Exemple #9

0

Afficher le fichier

Fichier : __init__.py Projet : weblyzard/ewrt

def parse(url, last_modified=None):
    """ 
    Parses the given RSS Feed an returns all articles and the content of
    the page referenced in the <link> tag.
    
    @param url: the url of the rss feed
    @param last_modified: a datetime object that specifies the last time the
                          feed has been queried the last time (only newer 
                          entries are returned).  
    """
    feed = feedparser.parse(url, modified=last_modified)
    retrieve = Retrieve("rss", HTTP_FETCH_DELAY)
    
    result = []
    for item in feed['items']:
        if datetime.fromtimestamp(
                mktime(item['updated_parsed'])) > last_modified:
            item['content'] = retrieve.open(item['link']).read()
            result.append(item)

    return result

Exemple #10

0

Afficher le fichier

Fichier : __init__.py Projet : project-asap/ewrt

def parse(url, last_modified=None):
    """ 
    Parses the given RSS Feed an returns all articles and the content of
    the page referenced in the <link> tag.
    
    @param url: the url of the rss feed
    @param last_modified: a datetime object that specifies the last time the
                          feed has been queried the last time (only newer 
                          entries are returned).  
    """
    feed = feedparser.parse(url, modified=last_modified)
    retrieve = Retrieve("rss", HTTP_FETCH_DELAY)

    result = []
    for item in feed['items']:
        if datetime.fromtimestamp(mktime(
                item['updated_parsed'])) > last_modified:
            item['content'] = retrieve.open(item['link']).read()
            result.append(item)

    return result

Exemple #11

0

Afficher le fichier

Fichier : descriptor.py Projet : k3njiy/ewrt

class WikiPedia(object):
    """ returns a wikipedia article """

    def __init__(self):
        self.r = Retrieve( WikiPedia.__name__ )
    
    def getDescriptor(self, synonym, lang='en'):
        """ returns the descriptor for the given synonym in the diven language """
        assert( len(lang)==2 )
        try:
            result = self.getWikipediaSearchResults(synonym, lang)
            return result[0]
        except (HTTPError, IndexError):
            return None


    def getWikipediaSearchResults(self, term, lang):
        """ returns a list of wikipedia search results for the given term 
            or None if nothing was found 
        """
        search_query = WIKIPEDIA_SEARCH_QUERY % (lang, quote(term) )
        f=self.r.open(search_query)
        results = WikiPedia._parse_wikipedia_search_results( f.read() )
        f.close()

        return results

    @staticmethod
    def _parse_wikipedia_search_results( text ):
        result = []
        for line in text.split("\n"):
            # only consider lines containing search results
            if not "class='searchresult'" in line: continue

            (prefix, tmp) = line.split("title=\"", 1)
            (descriptor, suffix ) = tmp.split("\"", 1)

            result.append(descriptor)

        return result

Exemple #12

0

Afficher le fichier

Fichier : descriptor.py Projet : weblyzard/ewrt

class WikiPedia(object):
    """ returns a wikipedia article """

    def __init__(self):
        self.r = Retrieve(WikiPedia.__name__)

    def getDescriptor(self, synonym, lang='en'):
        """ returns the descriptor for the given synonym in the diven language """
        assert(len(lang) == 2)
        try:
            result = self.getWikipediaSearchResults(synonym, lang)
            return result[0]
        except (HTTPError, IndexError):
            return None

    def getWikipediaSearchResults(self, term, lang):
        """ returns a list of wikipedia search results for the given term 
            or None if nothing was found 
        """
        search_query = WIKIPEDIA_SEARCH_QUERY % (lang, quote(term))
        f = self.r.open(search_query)
        results = WikiPedia._parse_wikipedia_search_results(f.read())
        f.close()

        return results

    @staticmethod
    def _parse_wikipedia_search_results(text):
        result = []
        for line in text.split("\n"):
            # only consider lines containing search results
            if not "class='searchresult'" in line: continue

            (prefix, tmp) = line.split("title=\"", 1)
            (descriptor, suffix) = tmp.split("\"", 1)

            result.append(descriptor)

        return result

Exemple #13

0

Afficher le fichier

Fichier : __init__.py Projet : JakobSteixner/ewrt

class WebOfTrust(object):
    def __init__(self, api_key, service_url=SERVICE_URL):
        self.api_key = api_key
        self.service_url = service_url
        self.retrieve = Retrieve('eWRT.ws.wot')

    def get_reputation(self, hosts):
        query = {'hosts': self._encode_hosts(hosts), 'api_key': self.api_key}

        urlObj = self.retrieve.open(self.service_url % query)

        if not urlObj:
            raise Exception('got no result')

        return self._format_result(json.loads(urlObj.read()))

    @classmethod
    def _encode_hosts(cls, hosts):
        ''' 
        >>> WebOfTrust._encode_hosts(['http://wu.ac.at', 'https://wu.ac.at'])
        'wu.ac.at/'
        >>> WebOfTrust._encode_hosts(['wu.ac.at', 'https://modul.ac.at/'])
        'wu.ac.at/modul.ac.at/'
        '''
        if isinstance(hosts, string_types):
            hosts = [hosts]

        selected_hosts = []

        for host in hosts:

            if not host.startswith('http'):
                host = 'http://%s' % host
            netloc = '%s/' % quote(urlparse(host).netloc)

            if not netloc in selected_hosts:
                selected_hosts.append(netloc)

        assert len(hosts) <= MAX_HOSTS, 'too many hosts (max: %s)!' % MAX_HOSTS
        return ''.join(selected_hosts)

    @classmethod
    def _encode_url(cls, service_url, query):
        ''' encodes the url '''
        return service_url % query

    @classmethod
    def _format_result(cls, data):
        '''
        Formats the result using MAPPING. The components for the reputation 
        provide the reputation and confidence. See WOT Developer API for 
        details
        '''
        result = {}
        for host, reputation in data.iteritems():
            r = {}
            for attr_name, new_attr_name in MAPPING.iteritems():
                if attr_name in reputation:
                    r[new_attr_name] = reputation[attr_name]
            r['wot_link'] = WOT_LINK % r['target']
            result[host] = r

        return result

Exemple #14

0

Afficher le fichier

Fichier : __init__.py Projet : weblyzard/ewrt

class AmazonWS(object):
    """ This class provides low level amazon web service access """

    def __init__(self, location='us', key=None):
        """ init """
        assert (location in AMAZON_LOCATIONS)
        self.retrieve = Retrieve(self.__class__.__name__)
        self.wsBase = AMAZON_LOCATIONS[location]
        self.accessKey = key or AMAZON_ACCESS_KEY
        self.amazon_url = AmazonUrl()

    def generateWsUrl(self, arguments):
        """ generates a valid amazon webservice request url """
        argList = ["%s&SubscriptionId=%s" % (
            self.wsBase, self.accessKey)] + ["%s=%s" % (k, quote(v)) for k, v in arguments.items()]
        return "&".join(argList)

    def generateSignedWsUrl(self, **arguments):
        """ generates a valid amazon webservice request url """
        #argList = [ "%s&SubscriptionId=%s" % (self.wsBase, self.accessKey) ] + [ "%s=%s" % (k,quote(v)) for k,v in arguments.items() ]
        # return "&".join(argList)
        return self.amazon_url.get_request_url(arguments)

    def query(self, arguments):
        """ retrieves a result from amazon webservice """
        url = self.generateWsUrl(arguments)

        done = False
        while not done:
            try:
                f = self.retrieve.open(url)
                res = f.read()
                self._write_debug_data(res)
                f.close()
                done = True
            except ValueError:
                logging.warning(
                    "Exception webservice query - waiting for %d seconds...\n" % ERROR_SLEEP_TIME)
                time.sleep(ERROR_SLEEP_TIME)
        return res

        @staticmethod
        def _write_debug_data(data):
            """ writes the given data to the debug file, if specified """
            if not AMAZON_DEBUG_FILE:
                return

            d = open(AMAZON_DEBUG_FILE, "a")
            d.write(data)
            d.close()

    def searchItem(self, searchIndex='Books', **param):
        """ searches an item in the amazon product repository """
        arguments = {'Operation': 'ItemSearch',
                     'SearchIndex': searchIndex,
                     'BrowseNode': '1000',
                     'Sort': 'salesrank',
                     'ResponseGroup': 'SalesRank,Small'}

        arguments.update(param)
        return self.query(arguments)

    def queryReview(self, itemId, **param):
        """ queries customers reviews to the selected Item """
        arguments = {'Operation': 'ItemLookup',
                     'ResponseGroup': 'Reviews',
                     'ItemId': itemId}
        arguments.update(param)
        return self.query(arguments)

        def newReleases(self, **param):
            """ returns a list of asins of new releases """
            arguments = {'Operation': 'BrowseNodeLookup',
                         'ResponseGroup': 'NewReleases',
                         'Marketplace': 'us'}

            arguments.update(param)
            return self.query(arguments)

        def itemAttributes(self, item_id, **param):
            """ returns all item attribues """
            arguments = {'Operation': 'ItemLookup',
                         'ItemId': item_id,
                         'IdType': 'ASIN',
                         'ResponseGroup': 'ItemAttributes,SalesRank'}
            arguments.update(param)
            return self.query(arguments)

Exemple #15

0

Afficher le fichier

Fichier : wl_dictionaries.py Projet : JakobSteixner/weblyzard_api

class WeblyzardDictionaries(object):

    def __init__(self, user, password,
                 local_dir=LOCAL_DIR,
                 server_url=SERVER_URL,
                 max_age_hours=MAX_AGE_HOURS):

        if not os.path.exists(local_dir):
            os.makedirs(local_dir)
        self.max_file_age = datetime.now() - timedelta(hours=max_age_hours)
        self.local_dir = local_dir
        self.server_url = server_url
        self.retrieve = Retrieve(__file__)
        self.user = user
        self.password = password

    @staticmethod
    def is_online(server_url):
        '''
        Checks, whether the given url is online.

        :param server_url: \
            the url to check.

        :returns:
            True, if the dictionary server is online/reachable.
        '''
        hostname = urlparse.urlsplit(server_url).netloc
        try:
            gethostbyname(hostname)
            return True
        except gaierror:
            return False

    def get_dictionary(self, dictionary_uri):
        ''' tries to load the dictionary from the file-system. If the function
        cannot find the file or if the file is too old (see MAX_AGE_HOURS), 
        the function will load the dictionary from the server.
        :param dictionary_uri: URI for the dictionary, e.g. people/de/titles/all.txt
        :returns: full file name of the dictionary
        '''

        if dictionary_uri.startswith('/'):
            dictionary_uri = dictionary_uri[1:]

        full_path = os.path.join(self.local_dir, dictionary_uri)

        # skip retrieval, if the server is not available
        if not self.is_online(SERVER_URL):
            return full_path

        fetch_file = True

        if os.path.isfile(full_path):
            last_mod = datetime.fromtimestamp(os.path.getmtime(full_path))

            if last_mod < self.max_file_age:
                last_mod_server = self.get_last_mod_date(dictionary_uri)

                if last_mod_server < last_mod:
                    fetch_file = False
            else:
                fetch_file = False

        if fetch_file:
            self.get_from_server(dictionary_uri, full_path)

        return full_path

    def get_last_mod_date(self, dictionary_uri):
        ''' Requests the URL with a HEAD request to retrieve the last_modified 
        date of the file
        :param dictionary_uri: URI for the dictionary, e.g. people/de/titles/all.txt
        '''

        full_url = urlparse.urljoin(self.server_url, dictionary_uri)
        response = self.retrieve.open(full_url,
                                      user=self.user,
                                      pwd=self.password,
                                      accept_gzip=False,
                                      head_only=True)
        last_modified = response.headers.get('Last-Modified')

        if last_modified:
            return datetime.strptime(last_modified, '%a, %d %b %Y %H:%M:%S %Z')

    def get_from_server(self, dictionary_uri, target_path):
        ''' Fetches a dictionary from the server and stores it on the local FS.
        :param dictionary_uri: URI for the dictionary, e.g. people/de/titles/all.txt
        :param target_path: destination on local FS to store the file
        :returns: target_path if the file was saved
        '''

        full_url = urlparse.urljoin(self.server_url, dictionary_uri)
        response = self.retrieve.open(full_url,
                                      user=self.user,
                                      pwd=self.password)

        if response:
            target_directory = os.path.dirname(target_path)

            if not os.path.exists(target_directory):
                os.makedirs(target_directory)

            with open(target_path, 'w') as f:
                f.write(response.read())

            return target_path

Exemple #16

0

Afficher le fichier

class GooglePlus(object):
    '''
    classdocs
    '''
    def __init__(self, api_key, api_url=API_URL):
        ''' Constructor      '''
        WebDataSource.__init__(self)
        self.api_key = api_key
        self.api_url = api_url
        self.retrieve = Retrieve('google-plus')

    def search(self, search_terms, max_results=DEFAULT_MAX_RESULTS):
        ''' searches Google+ for the given search_terms 
        :param search_terms: search terms
        :type search_terms: list
        :param max_results: maximum number of result
        :type max_results: int
        :returns: generator with the result
        '''
        for search_term in search_terms:
            if isinstance(search_term, str):
                search_term = search_term.encode('utf-8')
            params = {
                'query': '"%s"' % search_term,
                'orderBy': DEFAULT_ORDER_BY,
                'maxResults': max_results
            }

            fetched = self.make_request(params, 'activities')

            for item in fetched['items']:
                try:
                    yield self.convert_item(item)
                except Exception as e:
                    logger.info('Error %s occured' % e)
                    continue

    def get_activity(self, activity_id):
        ''' returns the activity with the given ID
        :param activity_id: GooglePlus activity ID
        :type activity_id: string
        :returns: mapped result
        :rtype: dict
        '''
        item = self.make_request(path='activities/%s' % activity_id)
        return self.convert_item(item)

    def make_request(self, params=None, path='activities'):
        ''' executes the request to GooglePlus
        :param params: paremeters for the query
        :type params: list or None
        :param path: path to query, e.g. activities
        :type path: string
        :returns: GooglePlus result
        :rtype: dict
        '''
        url = self.get_request_url(params, path)
        data = self.retrieve.open(url)
        return json.load(data)

    def get_request_url(self, params=None, path='activities'):
        ''' returns a correctly parsed request URL 
        :param params: paremeters for the query
        :type params: list or None
        :param path: path to query, e.g. activities
        :type path: string
        :returns: GooglePlus request URL
        :rtype: str

        Usage: 
            >>> plus = GooglePlus('abcd')
            >>> plus.get_request_url()
            'https://www.googleapis.com/plus/v1/activities?key=abcd'
        '''
        params = params if params else {}

        if not 'key' in params:
            params['key'] = self.api_key

        if 'maxResults' in params and params[
                'maxResults'] > DEFAULT_MAX_RESULTS:
            params['maxResults'] = DEFAULT_MAX_RESULTS

        return self.api_url.format(path=path, query=urlencode(params))

    @classmethod
    def convert_item(cls, item):
        ''' applies a mapping to convert the result to the required format
        :param item: GooglePlus Activity
        :type item: dict
        :rtype: dict
        '''

        last_modified = datetime.strptime(item['updated'],
                                          '%Y-%m-%dT%H:%M:%S.%fZ')
        published = datetime.strptime(item['updated'], '%Y-%m-%dT%H:%M:%S.%fZ')

        content = cls.convert_content(item['object']['content'])

        if not item['verb'] == 'post':
            raise Exception('Skipping activity of type "%s"' % item['verb'])

        if not len(content):
            logger.info('Skipping "%s" -> content is empty' % item['title'])
            raise Exception('content is empty')

        if 'attachments' in item['object']:
            for attachment in item['object']['attachments']:
                if attachment['objectType'] == 'article':
                    if not 'content' in attachment:
                        raise Exception('no content in attachment')

                    content = '%s\n"%s" (%s)' % (
                        content, cls.convert_content(
                            attachment['content']), attachment['url'])

        activity = {
            'content': content,
            'title': item['actor']['displayName'],
            'url': item['url'],
            'last_modified': last_modified,
            'user_id': item['actor']['id'],
            'user_img_url': item['actor']['image']['url'],
            'screen_name': item['actor']['displayName'],
            'encoding': u'utf-8',
            'user_url': item['actor']['url'],
            'valid_from': published,
            'reshares': item['object']['resharers']['totalItems'],
            'plusoners': item['object']['plusoners']['totalItems'],
            'activity_id': item['id'],
        }

        if 'geocode' in activity:
            activity['geocode'] = item['geocode']

        return activity

Exemple #17

0

Afficher le fichier

class AmazonWS(object):
    """ This class provides low level amazon web service access """
    def __init__(self, location='us', key=None):
        """ init """
        assert (location in AMAZON_LOCATIONS)
        self.retrieve = Retrieve(self.__class__.__name__)
        self.wsBase = AMAZON_LOCATIONS[location]
        self.accessKey = key or AMAZON_ACCESS_KEY
        self.amazon_url = AmazonUrl()

    def generateWsUrl(self, arguments):
        """ generates a valid amazon webservice request url """
        argList = ["%s&SubscriptionId=%s" % (self.wsBase, self.accessKey)] + [
            "%s=%s" % (k, quote(v)) for k, v in list(arguments.items())
        ]
        return "&".join(argList)

    def generateSignedWsUrl(self, **arguments):
        """ generates a valid amazon webservice request url """
        #argList = [ "%s&SubscriptionId=%s" % (self.wsBase, self.accessKey) ] + [ "%s=%s" % (k,quote(v)) for k,v in arguments.items() ]
        # return "&".join(argList)
        return self.amazon_url.get_request_url(arguments)

    def query(self, arguments):
        """ retrieves a result from amazon webservice """
        url = self.generateWsUrl(arguments)

        done = False
        while not done:
            try:
                f = self.retrieve.open(url)
                res = f.read()
                self._write_debug_data(res)
                f.close()
                done = True
            except ValueError:
                logging.warning(
                    "Exception webservice query - waiting for %d seconds...\n"
                    % ERROR_SLEEP_TIME)
                time.sleep(ERROR_SLEEP_TIME)
        return res

        @staticmethod
        def _write_debug_data(data):
            """ writes the given data to the debug file, if specified """
            if not AMAZON_DEBUG_FILE:
                return

            d = open(AMAZON_DEBUG_FILE, "a")
            d.write(data)
            d.close()

    def searchItem(self, searchIndex='Books', **param):
        """ searches an item in the amazon product repository """
        arguments = {
            'Operation': 'ItemSearch',
            'SearchIndex': searchIndex,
            'BrowseNode': '1000',
            'Sort': 'salesrank',
            'ResponseGroup': 'SalesRank,Small'
        }

        arguments.update(param)
        return self.query(arguments)

    def queryReview(self, itemId, **param):
        """ queries customers reviews to the selected Item """
        arguments = {
            'Operation': 'ItemLookup',
            'ResponseGroup': 'Reviews',
            'ItemId': itemId
        }
        arguments.update(param)
        return self.query(arguments)

        def newReleases(self, **param):
            """ returns a list of asins of new releases """
            arguments = {
                'Operation': 'BrowseNodeLookup',
                'ResponseGroup': 'NewReleases',
                'Marketplace': 'us'
            }

            arguments.update(param)
            return self.query(arguments)

        def itemAttributes(self, item_id, **param):
            """ returns all item attribues """
            arguments = {
                'Operation': 'ItemLookup',
                'ItemId': item_id,
                'IdType': 'ASIN',
                'ResponseGroup': 'ItemAttributes,SalesRank'
            }
            arguments.update(param)
            return self.query(arguments)

Exemple #18

0

Afficher le fichier

Fichier : __init__.py Projet : weblyzard/ewrt

class WebOfTrust(object):
    
    def __init__(self, api_key, service_url=SERVICE_URL):
        self.api_key = api_key
        self.service_url = service_url
        self.retrieve = Retrieve('eWRT.ws.wot')
        
    def get_reputation(self, hosts): 
        query={'hosts': self._encode_hosts(hosts),
               'api_key': self.api_key}
        
        urlObj = self.retrieve.open(self.service_url % query)
        
        if not urlObj:
            raise Exception('got no result')
        
        return self._format_result(json.loads(urlObj.read())) 
        
    @classmethod
    def _encode_hosts(cls, hosts):
        ''' 
        >>> WebOfTrust._encode_hosts(['http://wu.ac.at', 'https://wu.ac.at'])
        'wu.ac.at/'
        >>> WebOfTrust._encode_hosts(['wu.ac.at', 'https://modul.ac.at/'])
        'wu.ac.at/modul.ac.at/'
        '''
        if isinstance(hosts, string_types):
            hosts = [hosts]
        
        selected_hosts = []
        
        for host in hosts:
            
            if not host.startswith('http'):
                host = 'http://%s' % host
            netloc = '%s/' % quote(urlparse(host).netloc)
            
            if not netloc in selected_hosts: 
                selected_hosts.append(netloc)
        
        assert len(hosts) <= MAX_HOSTS, 'too many hosts (max: %s)!' % MAX_HOSTS
        return ''.join(selected_hosts)
    
    @classmethod
    def _encode_url(cls, service_url, query):
        ''' encodes the url '''
        return service_url % query

    @classmethod
    def _format_result(cls, data):
        '''
        Formats the result using MAPPING. The components for the reputation 
        provide the reputation and confidence. See WOT Developer API for 
        details
        '''
        result = {}
        for host, reputation in data.iteritems():
            r = {}
            for attr_name, new_attr_name in MAPPING.iteritems():
                if attr_name in reputation:
                    r[new_attr_name] = reputation[attr_name]
            r['wot_link'] = WOT_LINK % r['target']
            result[host] = r
            
        return result

Exemple #19

0

Afficher le fichier

Fichier : plus.py Projet : weblyzard/ewrt

class GooglePlus(object):
    '''
    classdocs
    '''

    def __init__(self, api_key, api_url=API_URL):
        ''' Constructor      '''
        WebDataSource.__init__(self)
        self.api_key = api_key
        self.api_url = api_url
        self.retrieve = Retrieve('google-plus')

    def search(self, search_terms, max_results=DEFAULT_MAX_RESULTS):
        ''' searches Google+ for the given search_terms 
        :param search_terms: search terms
        :type search_terms: list
        :param max_results: maximum number of result
        :type max_results: int
        :returns: generator with the result
        '''
        for search_term in search_terms:
            if isinstance(search_term, unicode):
                search_term = search_term.encode('utf-8')
            params = {'query': '"%s"' % search_term,
                      'orderBy': DEFAULT_ORDER_BY,
                      'maxResults': max_results}

            fetched = self.make_request(params, 'activities')

            for item in fetched['items']:
                try:
                    yield self.convert_item(item)
                except Exception as e:
                    logger.info('Error %s occured' % e)
                    continue

    def get_activity(self, activity_id):
        ''' returns the activity with the given ID
        :param activity_id: GooglePlus activity ID
        :type activity_id: string
        :returns: mapped result
        :rtype: dict
        '''
        item = self.make_request(path='activities/%s' % activity_id)
        return self.convert_item(item)

    def make_request(self, params=None, path='activities'):
        ''' executes the request to GooglePlus
        :param params: paremeters for the query
        :type params: list or None
        :param path: path to query, e.g. activities
        :type path: string
        :returns: GooglePlus result
        :rtype: dict
        '''
        url = self.get_request_url(params, path)
        data = self.retrieve.open(url)
        return json.load(data)

    def get_request_url(self, params=None, path='activities'):
        ''' returns a correctly parsed request URL 
        :param params: paremeters for the query
        :type params: list or None
        :param path: path to query, e.g. activities
        :type path: string
        :returns: GooglePlus request URL
        :rtype: str

        Usage: 
            >>> plus = GooglePlus('abcd')
            >>> plus.get_request_url()
            'https://www.googleapis.com/plus/v1/activities?key=abcd'
        '''
        params = params if params else {}

        if not 'key' in params:
            params['key'] = self.api_key

        if 'maxResults' in params and params['maxResults'] > DEFAULT_MAX_RESULTS:
            params['maxResults'] = DEFAULT_MAX_RESULTS

        return self.api_url.format(path=path, query=urlencode(params))

    @classmethod
    def convert_item(cls, item):
        ''' applies a mapping to convert the result to the required format
        :param item: GooglePlus Activity
        :type item: dict
        :rtype: dict
        '''

        last_modified = datetime.strptime(item['updated'],
                                          '%Y-%m-%dT%H:%M:%S.%fZ')
        published = datetime.strptime(item['updated'],
                                      '%Y-%m-%dT%H:%M:%S.%fZ')

        content = cls.convert_content(item['object']['content'])

        if not item['verb'] == 'post':
            raise Exception('Skipping activity of type "%s"' % item['verb'])

        if not len(content):
            logger.info('Skipping "%s" -> content is empty' % item['title'])
            raise Exception('content is empty')

        if 'attachments' in item['object']:
            for attachment in item['object']['attachments']:
                if attachment['objectType'] == 'article':
                    if not 'content' in attachment:
                        raise Exception('no content in attachment')

                    content = '%s\n"%s" (%s)' % (content,
                                                 cls.convert_content(
                                                     attachment['content']),
                                                 attachment['url'])

        activity = {'content': content,
                    'title': item['actor']['displayName'],
                    'url': item['url'],
                    'last_modified': last_modified,
                    'user_id': item['actor']['id'],
                    'user_img_url': item['actor']['image']['url'],
                    'screen_name': item['actor']['displayName'],
                    'encoding': u'utf-8',
                    'user_url': item['actor']['url'],
                    'valid_from': published,
                    'reshares': item['object']['resharers']['totalItems'],
                    'plusoners': item['object']['plusoners']['totalItems'],
                    'activity_id': item['id'],
                    }

        if 'geocode' in activity:
            activity['geocode'] = item['geocode']

        return activity