Example #1
0
    def run_step(self, prev, params):
        # read data
        hs2 = pd.read_excel(prev, sheet_name='hs2')
        hs4 = pd.read_excel(prev, sheet_name='hs4')
        hs6 = pd.read_excel(prev, sheet_name='hs6')
        chapter = pd.read_excel(prev, sheet_name='chapter')

        ran = {'hs2': 4, 
               'hs4': 6, 
               'chapter': 2}
        langs = ['es', 'en']

        for k, v in ran.items():
            ids = hs6['id'].astype('str').str.zfill(8).str[:v].astype('int')
            hs6['{}_id'.format(k)] = ids
            for lan in langs:
                hs6['{}_{}'.format(k, lan)] = ids
                hs6['{}_{}_short'.format(k, lan)] = ids

        ran = {'hs2': hs2, 
               'hs4': hs4, 
               'hs6': hs6, 
               'chapter': chapter}
        for k, v in ran.items():
            for lan in langs:
                target = '{}_{}_short'.format(k, lan)
                base = '{}_{}'.format(k, lan)
                v = fill_values(v, target, base)

        for k, v in ran.items():
            for lan in langs:
                target = '{}_{}_short'.format(k, lan)
                base = '{}_{}'.format(k, lan)
                hs6[target].replace(dict(zip(v['id'], v[target])), inplace=True)
                hs6[base].replace(dict(zip(v['id'], v[base])), inplace=True)

        hs6.drop(columns=['trade_value'], inplace=True)

        hs6.rename(columns={'id': 'hs6_id',
                            'chapter_id': 'chapter'}, inplace=True)

        hs6.sort_values(by='hs6_id', inplace=True)

        cols_es = ['chapter_es', 'chapter_es_short', 'hs2_es', 'hs2_es_short', 'hs4_es', 'hs4_es_short', 'hs6_es', 'hs6_es_short']
        cols_en = ['chapter_en', 'chapter_en_short', 'hs2_en', 'hs2_en_short', 'hs4_en', 'hs4_en_short', 'hs6_en', 'hs6_en_short']

        # codes ids
        nltk.download('stopwords')
        stopwords_es = nltk.corpus.stopwords.words('spanish')
        hs6 = format_text(hs6, cols_es, stopwords=stopwords_es)
        hs6 = format_text(hs6, cols_en, stopwords=stop_words.ENGLISH_STOP_WORDS)

        for col in ['hs6_id', 'hs4_id', 'hs2_id', 'chapter']:
            hs6[col] = hs6[col].astype('int')
        
        hs6.drop_duplicates(subset='hs6_id', inplace=True)

        return hs6
Example #2
0
def __create_preview(review):
    preview = ''
    if review.get('review'):
        if review['review']['containsSpoilers']:
            preview += '```This review may contain spoilers.```'
        else:
            preview += format_text(review['review']['lbml'], 400)
    return preview
Example #3
0
 def post(self, pid):
     name = self.get_argument("reply[name]", default='')
     email = self.get_argument("reply[email]", default='')
     website = self.get_argument("reply[website]", default='')
     origin_content = self.get_argument("reply[content]", default='')
     content = format_text(md(format_text(origin_content)))
     if name == "":
         self.redirect("/post/%d" % int(pid), error=u"请填入名字")
     if email == "":
         self.redirect("/post/%d" % int(pid), error=u"请填入邮箱地址")
     if origin_content == "":
         self.redirect("/post/%d" % int(pid), error=u"请输入评论内容")
     number = db.query(Reply).filter(Reply.pid == pid).count() + 1
     db.add(Reply(pid=int(pid), name=name, email=email, website=website,
         content=content, origin_content=origin_content, number=number))
     db.commit()
     self.set_replyer(name, email, website)
     self.redirect("/post/%d#%d" % (int(pid), int(number)))
Example #4
0
 def post(self, pid):
     name = self.get_argument("reply[name]", default='')
     email = self.get_argument("reply[email]", default='')
     website = self.get_argument("reply[website]", default='')
     origin_content = self.get_argument("reply[content]", default='')
     content = format_text(md(format_text(origin_content)))
     if name == "":
         self.redirect("/post/%d" % int(pid), error=u"请填入名字")
     if email == "":
         self.redirect("/post/%d" % int(pid), error=u"请填入邮箱地址")
     if origin_content == "":
         self.redirect("/post/%d" % int(pid), error=u"请输入评论内容")
     number = db.query(Reply).filter(Reply.pid == pid).count() + 1
     db.add(
         Reply(pid=int(pid),
               name=name,
               email=email,
               website=website,
               content=content,
               origin_content=origin_content,
               number=number))
     db.commit()
     self.set_replyer(name, email, website)
     self.redirect("/post/%d#%d" % (int(pid), int(number)))
Example #5
0
async def __get_infos(list_id):
    list_json = await api_call('list/{}'.format(list_id))
    for link in list_json['links']:
        if link['type'] == 'letterboxd':
            url = link['url']
            break
    description = 'By **' + list_json['owner']['displayName'] + '**\n'
    description += str(list_json['filmCount']) + ' films\nPublished '
    description += list_json['whenPublished'].split('T')[0].strip() + '\n'
    if list_json.get('descriptionLbml'):
        description += format_text(list_json['descriptionLbml'], 300)
    if list_json['previewEntries']:
        poster_json = list_json['previewEntries'][0]['film'].get('poster')
        if not poster_json:
            return description, url, None, list_json['name']
        for poster in poster_json['sizes']:
            if poster['height'] > 400:
                poster_url = poster['url']
                break
    return description, url, poster_url, list_json['name']
Example #6
0
 def post(self, pid, id):
     self.check_admin()
     name = self.get_argument("reply[name]", default='')
     email = self.get_argument("reply[email]", default='')
     website = self.get_argument("reply[website]", default='')
     origin_content = self.get_argument("reply[content]", default='')
     content = md(format_text(origin_content))
     if name == "":
         self.redirect("/post/%d" % int(pid), error=u"请填入名字")
     if email == "":
         self.redirect("/post/%d" % int(pid), error=u"请填入邮箱地址")
     if origin_content == "":
         self.redirect("/post/%d" % int(pid), error=u"请输入评论内容")
     reply = db.query(Reply).get(id)
     if reply is None:
         raise tornado.web.HTTPError(404)
     reply.name = name
     reply.email = email
     reply.website = website
     reply.origin_content = origin_content
     reply.content = content
     db.commit()
     self.set_replyer(name, email, website)
     self.redirect("/post/%d#%d" % (int(pid), int(reply.number)))
Example #7
0
 def post(self, pid, id):
     self.check_admin()
     name = self.get_argument("reply[name]", default='')
     email = self.get_argument("reply[email]", default='')
     website = self.get_argument("reply[website]", default='')
     origin_content = self.get_argument("reply[content]", default='')
     content = md(format_text(origin_content))
     if name == "":
         self.redirect("/post/%d" % int(pid), error=u"请填入名字")
     if email == "":
         self.redirect("/post/%d" % int(pid), error=u"请填入邮箱地址")
     if origin_content == "":
         self.redirect("/post/%d" % int(pid), error=u"请输入评论内容")
     reply = db.query(Reply).get(id)
     if reply is None:
         raise tornado.web.HTTPError(404)
     reply.name = name
     reply.email = email
     reply.website = website
     reply.origin_content = origin_content
     reply.content = content
     db.commit()
     self.set_replyer(name, email, website)
     self.redirect("/post/%d#%d" % (int(pid), int(reply.number)))