def currentDataOfCity(cityName, useJSON=True, useMemcache=True): memcacheKey = cityName if useMemcache: result = memcache.get(memcacheKey, namespace=memcacheNamespace) #@UndefinedVariable if result is not None: if useJSON: return json.dumps(result) else: return result # Check valid cityName cityNameCheckFlag = False chtCityName = "" for item in cityList: if cityName==item[1]: chtCityName = item[0] cityNameCheckFlag = True if not cityNameCheckFlag: return errorMsg(201, "City is not found.") pageURL = dataDict[cityName][0] tablePosition = dataDict[cityName][1] # Start to fetch cwb for city list fetchResult = None try: fetchResult = urlfetch.fetch(pageURL) except DownloadError: return errorMsg(101, "Fetching city list is timeout!") # Check for result if fetchResult is None or fetchResult.status_code!=200: return errorMsg(300, "Fetching current data is failed!") # Make a soup and fetch necessary information soup = BeautifulSoup(fetchResult.content) soup.head.extract() currentInfos = soup.html.find("div", attrs={'class':'Current_info'}).findAll("table", attrs={"class":"datatable"}) soup.extract() # Process information resultDict = { "city": chtCityName, "description": unicode(currentInfos[tablePosition].findAll("tr")[2].td.img["alt"].strip()), "image": unicode("http://www.cwb.gov.tw"+currentInfos[tablePosition].findAll("tr")[2].td.img["src"].strip()), "temperature": unicode(currentInfos[tablePosition].findAll("tr")[3].td.contents[0][:-6].strip()), } # Return memcache.set(memcacheKey, resultDict, 4200, namespace=memcacheNamespace) #@UndefinedVariable if useJSON: return json.dumps(resultDict) else: return resultDict
def getLyrics(singer, song): #Replace spaces with _ #Delete multiple singers, only keep the first one singer = re.split('/', singer)[0] singer = singer.replace(' ', '_') song = song.replace(' ', '_') r = requests.get('http://lyrics.wikia.com/{0}:{1}'.format(singer, song)) s = BeautifulSoup(r.text) # if has redirect class redirect = s.find("ul", {'class': 'redirectText'}) # if has suggested: " Did you mean..." suggest = s.find("span", {'class': 'mw-headline'}) if redirect is not None: ss = redirect.text.replace(' ', '_') r = requests.get('http://lyrics.wikia.com/' + ss) s = BeautifulSoup(r.text) if suggest is not None: # has suggestion # delete 'did you mean' & '?' ss = suggest.text if 'Did you mean' in ss: ss = suggest.text.split('Did you mean')[1].split('?')[0].replace( ' ', '_') r = requests.get('http://lyrics.wikia.com/' + ss) s = BeautifulSoup(r.text) #Get main lyrics holder lyrics = s.find("div", {'class': 'lyricbox'}) if lyrics is None: #raise ValueError("Song or Singer does not exist or the API does not have Lyrics") return '######' #Remove Scripts [s.extract() for s in lyrics('script')] #Remove Comments comments = lyrics.findAll(text=lambda text: isinstance(text, Comment)) [comment.extract() for comment in comments] #Remove unecessary tags for tag in ['div', 'i', 'b', 'a']: for match in lyrics.findAll(tag): match.replaceWithChildren() #Get output as a string and remove non unicode characters and replace <br> with newlines output = str(lyrics).encode( 'utf-8', errors='replace')[22:-6:].decode("utf-8").replace( '\n', '').replace('<br/>', '\n') try: return output except: return output.encode('utf-8')
class Client(object): url = 'https://www.printing.ne.jp/cgi-bin/mn.cgi' def __init__(self, http_obj=None, user_agent=None): if http_obj is None: cache = DictCache() http_obj = httplib2.Http(cache=cache) self.http_obj = http_obj self.user_agent = user_agent self._soup = None self._encoding = None def _request(self, uri, method='GET', headers=None, body=None, status=(200, 304), **kwargs): """ Request on HTTP. Assume that using httplib2.Http, so even status is 304 by response, content must exist. """ uri = str(uri) if headers is not None: headers = headers.copy() else: headers = {} if self.user_agent is not None: headers['User-Agent'] = self.user_agent if isinstance(body, dict): if method not in ('POST', 'PUT'): method = 'POST' if is_multipart(body): body, boundary = encode_multipart_data(body) headers.update(MULTIPART_HEADERS) headers['Content-Type'] = MULTIPART_HEADERS['Content-Type'] + \ boundary else: body = urlencode(body, True) headers.update(FORMENCODE_HEADERS) (response, content) = self.http_obj.request(uri, method=method, headers=headers, body=body, **kwargs) assert response.status in status, \ "%s %s" % (response.status, response.reason) return (response, content) def ensure_encoding(self, s): if isinstance(s, str): # to unicode s = s.decode('utf-8') if isinstance(s, unicode): # to netprint encoding assert self._encoding is not None s = s.encode(self._encoding, 'replace') return s def login(self, username, password): """ Login to the Net print service. """ try: (_, content) = self._request(self.url, method='POST', body={'i': username, 'p': password}) soup = BeautifulSoup(content) session_field = soup.find('input', attrs={'name': 's'}) assert session_field self.session_key = session_field['value'] assert self.session_key except: raise LoginFailure("username or password is wrong.") self._soup = soup # update soup. self._encoding = self._soup.originalEncoding self._check_displaying_main_page_then_trim() def go_home(self): (_, content) = self._request( self.url + '?s=' + self.session_key) self._soup = BeautifulSoup(content) # update soup. self._encoding = self._soup.originalEncoding def reload(self): self.go_home() self._check_displaying_main_page_then_trim() def _check_displaying_main_page_then_trim(self): if self._soup is None: raise ValueError("need soup") ns_list = self._soup.findAll(text=u"ファイル名") if len(ns_list) != 1: raise UnexpectedContent ns = ns_list[0] if ns.findParent('tr')\ .findAll(text=lambda ns: len(ns.strip()) > 0) != header_row: raise UnexpectedContent # trim self._soup = ns.findParent('table') self._soup.extract() def list(self, retry=0): try: item_list = [] for row in self._soup.findAll('tr')[1:]: id_field = row.find('input', attrs={'name': 'fc'}) if id_field is None: raise Reload id = id_field['value'] if id is None: raise Reload column_list = list(row) id_column = column_list[2] error_row = id_column.find(text=u"エラー") is not None if not error_row and id_column.string is None: raise Reload name = unicode(column_list[1].string) file_size = unicode(column_list[3].string) if error_row: page_size = '' page_numbers = 0 else: page_size = unicode(column_list[4].string) page_numbers = int(column_list[5].string) valid_date = unicode(column_list[6].string) item_list.append(Item(id, name, file_size, page_size, page_numbers, valid_date, error_row, )) return item_list except Reload: if retry < MAX_RETRY: time.sleep(1) self.reload() return self.list(retry=retry + 1) else: raise def delete(self, *item_or_id): """ delete a file on Netprint. """ id_set = set() for i in item_or_id: if isinstance(i, Item): id_set.add(i.id) else: id_set.add(i) (_, content) = self._request(self.url, body={ 'c': 0, # unknown 's': self.session_key, 'fc': id_set, 'delexec.x': 1, 'delexec.y': 1}) def send(self, path_or_file, file_name=None, paper_size=PaperSize.A4, color=Color.choice_at_printing, reserversion_number=ReservationNumber.AlphaNum, need_secret=NeedSecret.No, secret_code=None, need_margin=NeedMargin.No, need_notification=NeedNotification.No, mail_address=None): """ send a file to Netprint. """ if isinstance(path_or_file, basestring): path = path_or_file f = file(path) elif is_file_like(path_or_file): f = path_or_file else: raise ValueError("unknown value of path_or_file") # wrap to set the name. if file_name: name = file_name else: name = f.name name = self.ensure_encoding(os.path.split(name)[-1]) f = StringIO(f.read()) f.name = name if paper_size == PaperSize.L and color != Color.color: raise ValueError("L size printing only accept color") if need_secret == NeedSecret.Yes and secret_code is None: raise ValueError("need secret_code") if need_notification == NeedNotification.Yes and mail_address is None: raise ValueError("need mail_address") sending_url = get_sending_target(f.name) self._request(sending_url, body=dict( s=self.session_key, c=0, # unknown m=2, # unknown re=1, # unknown file1=f, papersize=paper_size, color=color, number=reserversion_number, secretcodesw=need_secret, secretcode=secret_code or '', duplextype=9, # unknown magnification=need_margin, mailsw=need_notification, mailaddr=mail_address or ''))