def currentDataOfCity(cityName, useJSON=True, useMemcache=True):
    
    memcacheKey = cityName
    if useMemcache:
        result = memcache.get(memcacheKey, namespace=memcacheNamespace) #@UndefinedVariable
        if result is not None:
            if useJSON:
                return json.dumps(result)
            else:
                return result
    
    # Check valid cityName
    cityNameCheckFlag = False
    chtCityName = ""
    for item in cityList:
        if cityName==item[1]:
            chtCityName = item[0]
            cityNameCheckFlag = True
    if not cityNameCheckFlag:
        return errorMsg(201, "City is not found.")
    
    pageURL = dataDict[cityName][0]
    tablePosition = dataDict[cityName][1]
    
    # Start to fetch cwb for city list
    fetchResult = None
    try:
        fetchResult = urlfetch.fetch(pageURL)
    except DownloadError:
        return errorMsg(101, "Fetching city list is timeout!")
    
    # Check for result
    if fetchResult is None or fetchResult.status_code!=200:
        return errorMsg(300, "Fetching current data is failed!")
    
    # Make a soup and fetch necessary information
    soup = BeautifulSoup(fetchResult.content)
    soup.head.extract()
    currentInfos = soup.html.find("div", attrs={'class':'Current_info'}).findAll("table", attrs={"class":"datatable"})
    soup.extract()
    
    # Process information
    resultDict = {
               "city": chtCityName,
               "description": unicode(currentInfos[tablePosition].findAll("tr")[2].td.img["alt"].strip()),
               "image": unicode("http://www.cwb.gov.tw"+currentInfos[tablePosition].findAll("tr")[2].td.img["src"].strip()),
               "temperature": unicode(currentInfos[tablePosition].findAll("tr")[3].td.contents[0][:-6].strip()),
               }
    
    # Return
    memcache.set(memcacheKey, resultDict, 4200, namespace=memcacheNamespace) #@UndefinedVariable
    if useJSON:
        return json.dumps(resultDict)
    else:
        return resultDict
def getLyrics(singer, song):
    #Replace spaces with _
    #Delete multiple singers, only keep the first one
    singer = re.split('/', singer)[0]
    singer = singer.replace(' ', '_')
    song = song.replace(' ', '_')
    r = requests.get('http://lyrics.wikia.com/{0}:{1}'.format(singer, song))
    s = BeautifulSoup(r.text)
    # if has redirect class
    redirect = s.find("ul", {'class': 'redirectText'})
    # if has suggested: " Did you mean..."
    suggest = s.find("span", {'class': 'mw-headline'})
    if redirect is not None:
        ss = redirect.text.replace(' ', '_')
        r = requests.get('http://lyrics.wikia.com/' + ss)
        s = BeautifulSoup(r.text)
    if suggest is not None:
        # has suggestion
        # delete 'did you mean' & '?'
        ss = suggest.text
        if 'Did you mean' in ss:
            ss = suggest.text.split('Did you mean')[1].split('?')[0].replace(
                ' ', '_')
            r = requests.get('http://lyrics.wikia.com/' + ss)
            s = BeautifulSoup(r.text)

#Get main lyrics holder
    lyrics = s.find("div", {'class': 'lyricbox'})
    if lyrics is None:
        #raise ValueError("Song or Singer does not exist or the API does not have Lyrics")
        return '######'
    #Remove Scripts
    [s.extract() for s in lyrics('script')]

    #Remove Comments
    comments = lyrics.findAll(text=lambda text: isinstance(text, Comment))
    [comment.extract() for comment in comments]

    #Remove unecessary tags
    for tag in ['div', 'i', 'b', 'a']:
        for match in lyrics.findAll(tag):
            match.replaceWithChildren()
    #Get output as a string and remove non unicode characters and replace <br> with newlines
    output = str(lyrics).encode(
        'utf-8', errors='replace')[22:-6:].decode("utf-8").replace(
            '\n', '').replace('<br/>', '\n')
    try:
        return output
    except:
        return output.encode('utf-8')
Beispiel #3
0
class Client(object):

    url = 'https://www.printing.ne.jp/cgi-bin/mn.cgi'

    def __init__(self, http_obj=None, user_agent=None):
        if http_obj is None:
            cache = DictCache()
            http_obj = httplib2.Http(cache=cache)
        self.http_obj = http_obj
        self.user_agent = user_agent
        self._soup = None
        self._encoding = None

    def _request(self, uri, method='GET', headers=None, body=None,
            status=(200, 304), **kwargs):
        """
        Request on HTTP.

        Assume that using httplib2.Http, so even status is 304 by response,
        content must exist.
        """
        uri = str(uri)
        if headers is not None:
            headers = headers.copy()
        else:
            headers = {}
        if self.user_agent is not None:
            headers['User-Agent'] = self.user_agent
        if isinstance(body, dict):
            if method not in ('POST', 'PUT'):
                method = 'POST'
            if is_multipart(body):
                body, boundary = encode_multipart_data(body)
                headers.update(MULTIPART_HEADERS)
                headers['Content-Type'] = MULTIPART_HEADERS['Content-Type'] + \
                                          boundary
            else:
                body = urlencode(body, True)
                headers.update(FORMENCODE_HEADERS)
        (response, content) = self.http_obj.request(uri,
                method=method, headers=headers, body=body, **kwargs)
        assert response.status in status, \
               "%s %s" % (response.status, response.reason)
        return (response, content)

    def ensure_encoding(self, s):
        if isinstance(s, str):
            # to unicode
            s = s.decode('utf-8')
        if isinstance(s, unicode):
            # to netprint encoding
            assert self._encoding is not None
            s = s.encode(self._encoding, 'replace')
        return s

    def login(self, username, password):
        """
        Login to the Net print service.
        """
        try:
            (_, content) = self._request(self.url,
                    method='POST',
                    body={'i': username, 'p': password})

            soup = BeautifulSoup(content)
            session_field = soup.find('input', attrs={'name': 's'})
            assert session_field

            self.session_key = session_field['value']
            assert self.session_key
        except:
            raise LoginFailure("username or password is wrong.")
        self._soup = soup  # update soup.
        self._encoding = self._soup.originalEncoding
        self._check_displaying_main_page_then_trim()

    def go_home(self):
        (_, content) = self._request(
                self.url + '?s=' + self.session_key)
        self._soup = BeautifulSoup(content)  # update soup.
        self._encoding = self._soup.originalEncoding

    def reload(self):
        self.go_home()
        self._check_displaying_main_page_then_trim()

    def _check_displaying_main_page_then_trim(self):
        if self._soup is None:
            raise ValueError("need soup")

        ns_list = self._soup.findAll(text=u"ファイル名")
        if len(ns_list) != 1:
            raise UnexpectedContent

        ns = ns_list[0]
        if ns.findParent('tr')\
                .findAll(text=lambda ns: len(ns.strip()) > 0) != header_row:
            raise UnexpectedContent

        # trim
        self._soup = ns.findParent('table')
        self._soup.extract()

    def list(self, retry=0):
        try:
            item_list = []
            for row in self._soup.findAll('tr')[1:]:
                id_field = row.find('input', attrs={'name': 'fc'})
                if id_field is None:
                    raise Reload
                id = id_field['value']
                if id is None:
                    raise Reload

                column_list = list(row)
                id_column = column_list[2]

                error_row = id_column.find(text=u"エラー") is not None
                if not error_row and id_column.string is None:
                    raise Reload

                name = unicode(column_list[1].string)
                file_size = unicode(column_list[3].string)
                if error_row:
                    page_size = ''
                    page_numbers = 0
                else:
                    page_size = unicode(column_list[4].string)
                    page_numbers = int(column_list[5].string)
                valid_date = unicode(column_list[6].string)
                item_list.append(Item(id,
                                      name,
                                      file_size,
                                      page_size,
                                      page_numbers,
                                      valid_date,
                                      error_row,
                                     ))
            return item_list
        except Reload:
            if retry < MAX_RETRY:
                time.sleep(1)
                self.reload()
                return self.list(retry=retry + 1)
            else:
                raise

    def delete(self, *item_or_id):
        """
        delete a file on Netprint.
        """

        id_set = set()
        for i in item_or_id:
            if isinstance(i, Item):
                id_set.add(i.id)
            else:
                id_set.add(i)

        (_, content) = self._request(self.url, body={
            'c': 0,  # unknown
            's': self.session_key,
            'fc': id_set,
            'delexec.x': 1,
            'delexec.y': 1})

    def send(self, path_or_file,
             file_name=None,
             paper_size=PaperSize.A4,
             color=Color.choice_at_printing,
             reserversion_number=ReservationNumber.AlphaNum,
             need_secret=NeedSecret.No,
             secret_code=None,
             need_margin=NeedMargin.No,
             need_notification=NeedNotification.No,
             mail_address=None):
        """
        send a file to Netprint.
        """

        if isinstance(path_or_file, basestring):
            path = path_or_file
            f = file(path)
        elif is_file_like(path_or_file):
            f = path_or_file
        else:
            raise ValueError("unknown value of path_or_file")

        # wrap to set the name.
        if file_name:
            name = file_name
        else:
            name = f.name
        name = self.ensure_encoding(os.path.split(name)[-1])
        f = StringIO(f.read())
        f.name = name

        if paper_size == PaperSize.L and color != Color.color:
            raise ValueError("L size printing only accept color")
        if need_secret == NeedSecret.Yes and secret_code is None:
            raise ValueError("need secret_code")

        if need_notification == NeedNotification.Yes and mail_address is None:
            raise ValueError("need mail_address")

        sending_url = get_sending_target(f.name)
        self._request(sending_url, body=dict(
            s=self.session_key,
            c=0,  # unknown
            m=2,  # unknown
            re=1,  # unknown
            file1=f,
            papersize=paper_size,
            color=color,
            number=reserversion_number,
            secretcodesw=need_secret,
            secretcode=secret_code or '',
            duplextype=9,  # unknown
            magnification=need_margin,
            mailsw=need_notification,
            mailaddr=mail_address or ''))