def run_step(self, prev, params): # read data hs2 = pd.read_excel(prev, sheet_name='hs2') hs4 = pd.read_excel(prev, sheet_name='hs4') hs6 = pd.read_excel(prev, sheet_name='hs6') chapter = pd.read_excel(prev, sheet_name='chapter') ran = {'hs2': 4, 'hs4': 6, 'chapter': 2} langs = ['es', 'en'] for k, v in ran.items(): ids = hs6['id'].astype('str').str.zfill(8).str[:v].astype('int') hs6['{}_id'.format(k)] = ids for lan in langs: hs6['{}_{}'.format(k, lan)] = ids hs6['{}_{}_short'.format(k, lan)] = ids ran = {'hs2': hs2, 'hs4': hs4, 'hs6': hs6, 'chapter': chapter} for k, v in ran.items(): for lan in langs: target = '{}_{}_short'.format(k, lan) base = '{}_{}'.format(k, lan) v = fill_values(v, target, base) for k, v in ran.items(): for lan in langs: target = '{}_{}_short'.format(k, lan) base = '{}_{}'.format(k, lan) hs6[target].replace(dict(zip(v['id'], v[target])), inplace=True) hs6[base].replace(dict(zip(v['id'], v[base])), inplace=True) hs6.drop(columns=['trade_value'], inplace=True) hs6.rename(columns={'id': 'hs6_id', 'chapter_id': 'chapter'}, inplace=True) hs6.sort_values(by='hs6_id', inplace=True) cols_es = ['chapter_es', 'chapter_es_short', 'hs2_es', 'hs2_es_short', 'hs4_es', 'hs4_es_short', 'hs6_es', 'hs6_es_short'] cols_en = ['chapter_en', 'chapter_en_short', 'hs2_en', 'hs2_en_short', 'hs4_en', 'hs4_en_short', 'hs6_en', 'hs6_en_short'] # codes ids nltk.download('stopwords') stopwords_es = nltk.corpus.stopwords.words('spanish') hs6 = format_text(hs6, cols_es, stopwords=stopwords_es) hs6 = format_text(hs6, cols_en, stopwords=stop_words.ENGLISH_STOP_WORDS) for col in ['hs6_id', 'hs4_id', 'hs2_id', 'chapter']: hs6[col] = hs6[col].astype('int') hs6.drop_duplicates(subset='hs6_id', inplace=True) return hs6
def __create_preview(review): preview = '' if review.get('review'): if review['review']['containsSpoilers']: preview += '```This review may contain spoilers.```' else: preview += format_text(review['review']['lbml'], 400) return preview
def post(self, pid): name = self.get_argument("reply[name]", default='') email = self.get_argument("reply[email]", default='') website = self.get_argument("reply[website]", default='') origin_content = self.get_argument("reply[content]", default='') content = format_text(md(format_text(origin_content))) if name == "": self.redirect("/post/%d" % int(pid), error=u"请填入名字") if email == "": self.redirect("/post/%d" % int(pid), error=u"请填入邮箱地址") if origin_content == "": self.redirect("/post/%d" % int(pid), error=u"请输入评论内容") number = db.query(Reply).filter(Reply.pid == pid).count() + 1 db.add(Reply(pid=int(pid), name=name, email=email, website=website, content=content, origin_content=origin_content, number=number)) db.commit() self.set_replyer(name, email, website) self.redirect("/post/%d#%d" % (int(pid), int(number)))
def post(self, pid): name = self.get_argument("reply[name]", default='') email = self.get_argument("reply[email]", default='') website = self.get_argument("reply[website]", default='') origin_content = self.get_argument("reply[content]", default='') content = format_text(md(format_text(origin_content))) if name == "": self.redirect("/post/%d" % int(pid), error=u"请填入名字") if email == "": self.redirect("/post/%d" % int(pid), error=u"请填入邮箱地址") if origin_content == "": self.redirect("/post/%d" % int(pid), error=u"请输入评论内容") number = db.query(Reply).filter(Reply.pid == pid).count() + 1 db.add( Reply(pid=int(pid), name=name, email=email, website=website, content=content, origin_content=origin_content, number=number)) db.commit() self.set_replyer(name, email, website) self.redirect("/post/%d#%d" % (int(pid), int(number)))
async def __get_infos(list_id): list_json = await api_call('list/{}'.format(list_id)) for link in list_json['links']: if link['type'] == 'letterboxd': url = link['url'] break description = 'By **' + list_json['owner']['displayName'] + '**\n' description += str(list_json['filmCount']) + ' films\nPublished ' description += list_json['whenPublished'].split('T')[0].strip() + '\n' if list_json.get('descriptionLbml'): description += format_text(list_json['descriptionLbml'], 300) if list_json['previewEntries']: poster_json = list_json['previewEntries'][0]['film'].get('poster') if not poster_json: return description, url, None, list_json['name'] for poster in poster_json['sizes']: if poster['height'] > 400: poster_url = poster['url'] break return description, url, poster_url, list_json['name']
def post(self, pid, id): self.check_admin() name = self.get_argument("reply[name]", default='') email = self.get_argument("reply[email]", default='') website = self.get_argument("reply[website]", default='') origin_content = self.get_argument("reply[content]", default='') content = md(format_text(origin_content)) if name == "": self.redirect("/post/%d" % int(pid), error=u"请填入名字") if email == "": self.redirect("/post/%d" % int(pid), error=u"请填入邮箱地址") if origin_content == "": self.redirect("/post/%d" % int(pid), error=u"请输入评论内容") reply = db.query(Reply).get(id) if reply is None: raise tornado.web.HTTPError(404) reply.name = name reply.email = email reply.website = website reply.origin_content = origin_content reply.content = content db.commit() self.set_replyer(name, email, website) self.redirect("/post/%d#%d" % (int(pid), int(reply.number)))