Python strip_tagsの例、utils.strip_tags Pythonの例

コード例 #1

0

ファイルを表示

ファイル: main.py プロジェクト: jefmud/flask_blog_1

 def snippet(self, length=100):
     """returns a snippet of a particular length (default=100) without tags"""
     snippet_length = len(self.content)
     if snippet_length > length:
         snippet_length = length
     plain_text = strip_tags(self.content)
     return strip_tags(plain_text[0:snippet_length])

コード例 #2

0

ファイルを表示

ファイル: app.py プロジェクト: jefmud/flask-micro-blog

def page_mod_shortcodes(page, shortcodes):
    """takes in a page and alters using shortcodes, returns new page"""
    # theory, as shortcodes are processed, these turn into additional key value pairs on the page object
    # [[template sidebar-left.html]] would add page['template'] = 'sidebar-left.html'
    # if the first content keyword is "page" it fetches the page content referenced

    shortcode_tag = app.config['SHORTCODE_TAG']
    shortcode_endtag = app.config['SHORTCODE_ENDTAG']

    for shortcode in shortcodes:
        # remove tag markers and split on whitespace
        scs = shortcode
        scs = scs.replace(shortcode_tag, '')
        scs = scs.replace(shortcode_endtag, '')
        sclist = scs.split()
        if len(sclist) > 1:
            if sclist[0].lower() == "page":
                key = strip_tags(sclist[1].lower())
                slug = strip_tags(sclist[2].strip())
                cpage = g.db.pages.find_one({'slug': slug})
                page[key] = cpage['content']
            else:
                key = strip_tags(sclist[0]).strip(
                )  # have to strip_tags because of multiline shortcode
                value = scs[scs.find(key) + len(key):].strip(
                )  # exclude just the tag from the content
                page[key] = value

    # ensure that a default page-template is set (if it was not set in the shortcodes)
    page['template'] = page.get('template', 'page.html')
    page['theme'] = page.get('theme', 'default')

    if page['theme'] in bootswatch_themes:
        # bootswatch_themes are default theme set
        # slight weakness in design that we are dependent on SiteMeta navbackground setting
        g.theme = page['theme']
        page['theme'] == 'default'

    theme_path = os.path.join(BASE_DIR, 'templates', app.config['THEME_DIR'],
                              page['theme'])

    if not os.path.exists(theme_path) or page['theme'] == '':
        # in case user selected a non-existent theme
        page['theme'] = 'default'

    # note add some error trapping here,
    # if somehow there is a non-existent template, we should flash error and put them on default
    # page template.
    page['template'] = "{}/{}/{}".format(app.config['THEME_DIR'],
                                         page['theme'], page['template'])

    # remove shortcodes from display content
    for shortcode in shortcodes:
        page['content'] = page['content'].replace(shortcode, '')

    return page

コード例 #3

0

ファイルを表示

def getSurveyQuestions(surveyId, dataCenter, apiToken):
    print("GETTING SURVEY QUESTIONS...\n")

    # Step 1: Set API call parameters
    baseUrl = "https://{0}.qualtrics.com/API/v3/survey-definitions/{1}/questions/".format(
        dataCenter, surveyId)
    headers = {
        "content-type": "application/json",
        "x-api-token": apiToken,
    }

    # Step 2: Make the API CALL
    questionRequestUrl = baseUrl
    downloadRequestResponse = requests.request("GET",
                                               questionRequestUrl,
                                               headers=headers,
                                               stream=True)
    surveyQuestionData = json.loads(
        downloadRequestResponse.content)['result']['elements']

    # Step 3: Run through json object to get needed information
    # set questionData dict
    questionData = dict()
    # set counter
    counter = 0
    # set question type tracker
    questionTypes = []
    for question in surveyQuestionData:
        # increment counter
        counter += 1

        # conditionally set choices dict
        choices = "none"
        if "Choices" in question:
            choices = question['Choices']

        # set new question data dict
        questionData[question['QuestionID']] = {
            "ListPosition": counter,
            "QuestionText": strip_tags(question['QuestionText']),
            "QuestionDescription": strip_tags(question['QuestionDescription']),
            "QuestionType": strip_tags(question['QuestionType']),
            "QuestionChoices": choices
        }

        # check to see if question type has been captured before
        if question['QuestionType'] not in questionTypes:
            questionTypes.append(question['QuestionType'])

    # overview of question analysis and processing
    print("THERE ARE " + str(len(questionData)) + " QUESTIONS LOADED..\n")
    print("THERE ARE " + str(len(questionTypes)) + " QUESTION TYPES: \n")

    return questionData

コード例 #4

0

ファイルを表示

ファイル: mp4.py プロジェクト: michalkalita72/ECE_448

def main(args):
    print("Loading dataset...")
    train_set = utils.load_dataset(args.training_file)
    print("FILE: ", args.training_file)
    test_set = utils.load_dataset(args.test_file)
    print("Loaded dataset")
    print()

    #for algorithm, name in zip([baseline, viterbi_p1, viterbi_p2, extra], ['Baseline', 'Viterbi_p1', 'Viterbi_p2', 'extra']):
    for algorithm, name in zip([viterbi_p2, extra], ['Viterbi_p2', 'extra']):
        print("Running {}...".format(name))
        testtag_predictions = algorithm(train_set, utils.strip_tags(test_set))
        baseline_acc, correct_wordtagcounter, wrong_wordtagcounter = utils.evaluate_accuracies(
            test_set, testtag_predictions)
        multitags_acc, unseen_acc, = utils.specialword_accuracies(
            train_set, test_set, testtag_predictions)

        print("Accuracy: {:.2f}%".format(baseline_acc * 100))
        print("\tTop K Wrong Word-Tag Predictions: {}".format(
            utils.topk_wordtagcounter(wrong_wordtagcounter, k=4)))
        print("\tTop K Correct Word-Tag Predictions: {}".format(
            utils.topk_wordtagcounter(correct_wordtagcounter, k=4)))
        print("\tMultitags Accuracy: {:.2f}%".format(multitags_acc * 100))
        print("\tUnseen words Accuracy: {:.2f}%".format(unseen_acc * 100))
        print()

コード例 #5

0

ファイルを表示

ファイル: pipelines.py プロジェクト: omidmt/crawler

    def process_item(self, article, spider):

        doc = Document(article['text'])
        article['text'] = strip_tags(doc.summary())
        article['hash'] = hashlib.sha256(article['url']).hexdigest()

        return article

コード例 #6

0

ファイルを表示

ファイル: main.py プロジェクト: Xander354/flask-blog

 def snippet(self):
   text = strip_tags(self.content)
   snippet_length = len(text)
   if snippet_length > 250:
       snippet_length = 250
     
   return text[0:snippet_length]

コード例 #7

0

ファイルを表示

ファイル: pipelines.py プロジェクト: omidmt/crawler

 def process_item(self, article, spider):
     
     doc = Document(article['text'])
     article['text'] = strip_tags(doc.summary())
     article['hash'] = hashlib.sha256(article['url']).hexdigest()
     
     return article

コード例 #8

0

ファイルを表示

def extract_google_query_results(html):
    soup = BeautifulSoup(html, 'html.parser')
    # result block from google
    result_blocks = soup.find_all("div", class_="g")
    query_results = []

    for result_block in result_blocks:
        a = result_block.find("a")
        em = result_block.find("span", "st")

        href = extract_info(lambda: a['href'])
        title = extract_info(lambda: "".join(list(a.strings)))

        if not href or not href.startswith("http"):
            try:
                href = a['data-href']
            except:
                href = a['href']
                if href.startswith("/url"):
                    href = href.strip("/url?q=")
                    i = href.find("&sa")
                    if i != -1:
                        href = href[:i]

        description = extract_info(
            lambda: strip_tags("".join(list(em.strings))))

        query_results.append(
            QueryResult(unicode(title), unicode(href), unicode(description)))

    return query_results

コード例 #9

0

ファイルを表示

def main(args):
    print("Loading dataset...")
    train_set = utils.load_dataset(args.training_file)
    test_set = utils.load_dataset(args.test_file)
    print("Loaded dataset")
    print()

    algorithms = {
        "baseline": baseline,
        "viterbi_1": viterbi_1,
        "viterbi_2": viterbi_2,
        "viterbi_ec": viterbi_ec
    }
    algorithm = algorithms[args.algorithm]

    print("Running {}...".format(args.algorithm))
    testtag_predictions = algorithm(train_set, utils.strip_tags(test_set))
    baseline_acc, correct_wordtagcounter, wrong_wordtagcounter = utils.evaluate_accuracies(
        testtag_predictions, test_set)
    multitags_acc, unseen_acc, = utils.specialword_accuracies(
        train_set, testtag_predictions, test_set)

    print("Accuracy: {:.2f}%".format(baseline_acc * 100))
    print("\tMultitags Accuracy: {:.2f}%".format(multitags_acc * 100))
    print("\tUnseen words Accuracy: {:.2f}%".format(unseen_acc * 100))
    print("\tTop K Wrong Word-Tag Predictions: {}".format(
        utils.topk_wordtagcounter(wrong_wordtagcounter, k=4)))
    print("\tTop K Correct Word-Tag Predictions: {}".format(
        utils.topk_wordtagcounter(correct_wordtagcounter, k=4)))

    print()

コード例 #10

0

ファイルを表示

ファイル: snoc.py プロジェクト: Web5design/twittermap

 def process_feed_item(current):
     text = current.get('content', current.get('summary', None))
     if text is None:
         return
     if isinstance(text, list):
         text = text[0]
     if isinstance(text, dict):
         text = text['value']
     return utils.strip_tags(text).strip()

コード例 #11

0

ファイルを表示

ファイル: snoc.py プロジェクト: imclab/twittermap

 def process_feed_item(current):
     text = current.get('content', current.get('summary', None))
     if text is None:
         return
     if isinstance(text, list):
         text = text[0]
     if isinstance(text, dict):
         text = text['value']
     return utils.strip_tags(text).strip()

コード例 #12

0

ファイルを表示

def _book_markdown(book):
    book_md = (f"*{book['title']}* \n" f"{', '.join(book['authors'])}\n\n")
    description = book.get('description')
    if description:
        book_md = book_md + f"{book['description'][:200]}... "

    book_md = book_md + f"[На сайте 🌎]({book['link']})\n"

    return strip_tags(book_md)

コード例 #13

0

ファイルを表示

ファイル: blog.py プロジェクト: elidupree/eliduprees-website-source

def post_html(contents, title, permalink, taglist, stream_only, metadata, scrutinize = True, allow_comments = True, Patreon_type = "blog"):
  head = []
  post_content = blog_server_shared.postprocess_post_string(contents, metadata["id"], title, False, scrutinize)[0]
  
  head.append ("<script>window.elidupree.handle_content_warnings ('"+ metadata ["id"]+"', false)</script>" )

  next_transcript_number = 1
  while True:
    transcript_generator = re.search(r"<transcript"+ blog_server_shared.grouped_string_regex("transcript_text")+">", post_content, re.DOTALL)
    if transcript_generator is None:
      break
    transcript_identifier_string = str(next_transcript_number)+'_'+ metadata ["id"]
    post_content = post_content [0: transcript_generator.start(0)]+'<div id="transcript_'+ transcript_identifier_string+'" class="transcript_block"><div class="transcript_header">Transcript: <a id="show_transcript_button_'+ transcript_identifier_string+'" href="javascript:;">(show)</a><a id="hide_transcript_button_'+ transcript_identifier_string+'" href="javascript:;">(hide)</a></div><div class="transcript_content id'+ transcript_identifier_string+'">'+ transcript_generator.group("transcript_text")+'</div></div>' + post_content [transcript_generator.end(0):]
    head.append('''<style> 
html.transcript_hidden_'''+ transcript_identifier_string +''' div.transcript_content.id'''+ transcript_identifier_string +''' {display: none;}
#show_transcript_button_'''+ transcript_identifier_string +''' {display: none;}
html.transcript_hidden_'''+ transcript_identifier_string +''' #show_transcript_button_'''+ transcript_identifier_string +''' {display: inline;}
html.transcript_hidden_'''+ transcript_identifier_string +''' #hide_transcript_button_'''+ transcript_identifier_string +''' {display: none;}
    </style> 
    <script>
    window.elidupree.handle_transcript ("'''+ transcript_identifier_string +'''");
    </script>''')
    next_transcript_number = next_transcript_number + 1

  if stream_only == True:
    cutter = re. compile ( r"<cut>.*?</p>.*$", re.DOTALL)
    post_content = cutter.sub ('''[...]</p>
<a class="continue_reading" href="'''+ permalink +'''">Continue reading<span class="invisible"> '''+ title +'''</span>...</a>''', post_content)
    #this sometimes cuts off anchors, so make sure fragments point at the canonical URL
    post_content = re.sub ('href="#','href="' + permalink + '#', post_content)
  else:
    post_content = re.sub ("<cut>", "", post_content)
  
  calculate_readability = (stream_only != True)
  if calculate_readability:
    #using the automated readability index
    reference = re.sub(r"\s+", " ", html.unescape (utils.strip_tags (post_content)))
    sentences = len(re.findall (r"\w\w\w.*?[.?!]", reference))
    words = utils.word_count (reference)
    characters = len(re.findall (r"\w", reference))
    if words >0 and sentences >0:
      readability = 4.71*characters/words +0.5 *words/sentences -21.43
      post_content = post_content + '<em class="debug"> Approximate readability: ' + "{:.2f}".format (readability) + " ("+ str (characters) + " characters, " + str (words) +  " words, " + str (sentences)  + " sentences, " + "{:.2f}".format (characters/words) + " characters per word, " + "{:.2f}".format (words/sentences) + " words per sentence)</em>"
  
  post_content_sections = post_content.split("<bigbreak>")
  id_str = ''
  if title:
    id_str = 'id="'+utils.format_for_url(title)+'"'
    post_content_sections[0] = '<h1><a class="post_title_link" href="'''+permalink+'">'+title+'</a></h1>'+post_content_sections[0]
  for i in range(0, len(post_content_sections)):
    post_content_sections[i] = '<div class="post_content_section">'+post_content_sections[i]+'</div>'
  return ('''
<div '''+id_str+''' class="blog_post">
  '''+(''.join(post_content_sections))+'''
</div>'''+metadata_and_comments_section_html(title, permalink, taglist, stream_only, metadata, allow_comments = allow_comments, Patreon_type = Patreon_type), "".join (head))

コード例 #14

0

ファイルを表示

ファイル: urls.py プロジェクト: jtrain/nmt

def league(league):
    user_team = request.params.get("team")
    if not user_team:
        # a malformed POST - didn't select a team.
        return redirect('/%s' % league)

    user_team = strip_tags(user_team)
    new_user(user_team.decode('utf-8'), conn)
    response.set_cookie(league, cookie_safe(user_team),
                        max_age=3600*24*365, path='/%s' % league)
    redirect('/%s' % league)

コード例 #15

0

ファイルを表示

ファイル: mm-randbot.py プロジェクト: arvego/mm-randbot

def get_random_user(message):
    if message.chat.type != 'private':
        chat_id = message.chat.id
        query = User.select().where(User.chat_id == chat_id, User.is_member).order_by(fn.Random())
        if query:
            user = query[0]
            my_bot.reply_to(message, 'Вам выпал: <a href="tg://user?id={}">{}</a>'.
                            format(user.user_id, strip_tags(user.first_name)), parse_mode='HTML')
        else:
            pass  # TODO: log that we do not have user base
    else:
        my_bot.reply_to(message, "КТО ВЫ ТО? Я ТУТ ОДИН!")

コード例 #16

0

ファイルを表示

ファイル: urls.py プロジェクト: vu-au/nmt

def league(league):
    user_team = request.params.get("team")
    if not user_team:
        # a malformed POST - didn't select a team.
        return redirect('/%s' % league)

    user_team = strip_tags(user_team)
    new_user(user_team.decode('utf-8'), conn)
    response.set_cookie(league,
                        cookie_safe(user_team),
                        max_age=3600 * 24 * 365,
                        path='/%s' % league)
    redirect('/%s' % league)

コード例 #17

0

ファイルを表示

ファイル: builder.py プロジェクト: cameronmaske/cameronmaske.com

def recent_feed():
    feed = AtomFeed('Recent Articles',
                    feed_url=request.url, url=request.url_root)

    articles = (p for p in pages if 'published' in p.meta)

    for article in articles:
        feed.add(strip_tags(article['title']), unicode(article.html),
                 content_type='html',
                 url=article.path,
                 author='Cameron Maske',
                 published=article['date'],
                 updated=article['date'])
    return feed.get_response()

コード例 #18

0

ファイルを表示

ファイル: pinned.py プロジェクト: tabasku/harbour-neliapila

def add_pin(post_no,board,short_com,thumb_url,time_created,replies_count):
    timestamp = int(time.time())
    database = storage.Storage()
    #Pretty ghetto way to determine if string has http tags...
    if "<" in short_com:
        short_com = strip_tags(short_com)

    if len(short_com) > 100:
        short_com = short_com[0:100]

    short_com = short_com.replace("'","&#039;")

    short_com = short_com.encode()
    database.add_pin(post_no,board,short_com,thumb_url,timestamp,time_created,replies_count)

コード例 #19

0

ファイルを表示

def get_random_user(message):
    if message.chat.type != 'private':
        chat_id = message.chat.id
        query = User.select().where(User.chat_id == chat_id,
                                    User.is_member).order_by(fn.Random())
        if query:
            user = query[0]
            my_bot.reply_to(
                message,
                'Вам выпал: <a href="tg://user?id={}">{}</a>'.format(
                    user.user_id, strip_tags(user.first_name)),
                parse_mode='HTML')
        else:
            pass  # TODO: log that we do not have user base
    else:
        my_bot.reply_to(message, "КТО ВЫ ТО? Я ТУТ ОДИН!")

コード例 #20

0

ファイルを表示

 def search(self, pattern):
     body = self.soup.find('body')
     try:
         lines = body.prettify().split('\n')
     except:
         return []
     matches = []
     p = re.compile(pattern)
     for line in lines:
         line2 = utils.strip_tags(line)
         m = p.search(line2)
         if m:
             start = m.start()
             match_obj = FileParser.MatchObj(line2, start, start + len(m.group(0)))
             matches.append(match_obj)
     return matches

コード例 #21

0

ファイルを表示

def inlinequery(update, context):
    query = update.inline_query.query
    user_id = update.inline_query.from_user.id
    page = int(update.inline_query.offset or 1)

    logger.info(f"query: {query}, page: {page}")

    try:
        books = goodreads_api.get_search_books(user_id,
                                               query,
                                               page=page,
                                               per_page=20)
    except AuthError:
        logger.error(f"AuthError: user_id {user_id}")
        result = [(InlineQueryResultArticle(
            id=uuid4(),
            title="Запустите бота!",
            description=
            "Для использования бота, нажмите на кнопку выше, и авторизуйтесь в Goodreads",
            input_message_content=InputTextMessageContent("None"),
        ))]

        return update.inline_query.answer(result,
                                          cache_time=0,
                                          switch_pm_text="Добавить бота",
                                          switch_pm_parameter="f")

    result = []
    for index, book in enumerate(books):
        book_md = (
            f"*{strip_tags(book['title'])}* \n"
            f"{', '.join(book['authors'])}\n"
            f"[На сайте 🌎](https://www.goodreads.com/book/show/{book['id']})")

        add_book_button = InlineKeyboardButton(
            "Добавить книгу 📚", callback_data=f"inlinebook {book['id']}")
        result.append(
            InlineQueryResultArticle(
                id=uuid4(),
                title=strip_tags(book["title"]),
                thumb_url=book["image_url"],
                description=f"{', '.join(book['authors'])}",
                input_message_content=InputTextMessageContent(
                    book_md, ParseMode.MARKDOWN),
                reply_markup=InlineKeyboardMarkup([[add_book_button]])))

    update.inline_query.answer(result, next_offset=page + 1)

コード例 #22

0

ファイルを表示

ファイル: bot.py プロジェクト: Oxinhagg/telegram-bookshelf-bot

def book(update, context):
    user_id = update.message.from_user.id
    book_id = update.message.text.split('_')[1]

    logger.info((f"user_id: {user_id}, " f"book_id:{book_id}"))

    try:
        book = goodreads_api.get_book(user_id, book_id)
    except AuthError as ex:
        logger.error(f"AuthError: user_id {user_id}")
        return context.bot.send_message(user_id, text=str(ex))

    markup = _book_buttons(book.get('shelf'), book_id, user_id)

    update.message.reply_text(text=strip_tags(book['markdown']),
                              parse_mode=ParseMode.MARKDOWN,
                              reply_markup=markup)

コード例 #23

0

ファイルを表示

def make_page(title, head_stuff, body_stuff, extras = {}):
  jQuery ='''<script type="text/javascript" src="/media/jquery-3.0.0.min.js?rr"></script>'''
  jQuery_before = ""
  jQuery_after = ""
  if "jQuery_before" in extras:
    jQuery_before = jQuery
  else:
    jQuery_after = jQuery
  
  unbranded_title = re.sub(r"\s*⊂.*", "", title)
  image = (extras ["blurb_image"] if "blurb_image" in extras else "/media/colorful-background.jpg?rr")
  cooperation_stuff = ['''
    <meta name="twitter:card" content="summary">
    <meta name="twitter:site" content="@EliDupree">
    <meta property="og:site_name" content="Eli Dupree's website">
    <meta property="og:title" content="'''+ title +'''">
    <!--<link rel="image_src" href="'''+ image +'''">-->
    <meta property="og:image" content="'''+ utils.canonical_scheme_and_domain + image +'''">
    ''']
  if "blurb" in extras:
    blurb = html.escape (utils.strip_tags (extras ["blurb"]))
    cooperation_stuff.append ('''
      <meta name="description" content="'''+ blurb +'''">
      <meta property="og:description" content="'''+ blurb +'''">
      ''')
  
  return '''<!DOCTYPE html>
<html lang="en" class="javascript_disabled'''+(' '+extras["html_class"] if "html_class" in extras else '')+'''">
  <head>
    <meta charset="utf-8" />
    <title>'''+title+'''</title>
    <link rel="shortcut icon" href="/favicon.ico" />
    <link rel="stylesheet" type="text/css" href="'''+css.domain_relative_url()+'''?rr">
    <link rel="alternate" type="application/atom+xml" href="/atom.xml" title="RSS (Atom) feed" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
    '''+ "".join (cooperation_stuff) +'''
    <script type="text/javascript" src="/before-body.js?rr"></script>
    '''+ jQuery_before +'''
    '''+head_stuff+'''
  </head>
  <body>
    '''+body_stuff+'''
    '''+ jQuery_after +'''
    <script type="text/javascript" src="/after-body.js?rr"></script>
    '''+ (extras ["after_body"] if "after_body" in extras else "") +'''

コード例 #24

0

ファイルを表示

ファイル: mpx_pyes_index.py プロジェクト: gvenkataraman/experiments

def index(fname, index_name, keys_to_tag):
    fptr = open(fname, 'rb')
    line_count = 0
    conn = ES(["localhost:9200"])
    if not conn.exists_index(index_name):
        conn.create_index(index_name)
    start = time.clock()
    numb_exceptions = 0

    for line in fptr:
        if ((line_count % 10000) == 0):
            end = time.clock()
            minutes = (end - start) / 60.0
            print 'File: %s Done with %d took %f min. ' %(fname, line_count, minutes)
            print 'number of exceptions ', numb_exceptions
        line_count += 1
        data = json.loads(line)
        if not data.get('tags'):
            continue
        post_id = int(data['post_id'])
        found_content = False
        for k in keys_to_tag:
            if data.get(k):
                found_content = True
        if not found_content:
            continue
        index_data = dict()
        for k in keys_to_tag:
            value = data.get(k)
            if (value and (k == 'content')):
                try:
                    stripped_value = utils.strip_tags(value)
                except Exception:
                    stripped_value = value
                index_data[k] = stripped_value
        if post_id and data:
            try:
                conn.index(index_data, index_name, "test-type", post_id)
            except Exception:
                numb_exceptions += 1
                continue

    print 'number of exceptions ', numb_exceptions

コード例 #25

0

ファイルを表示

ファイル: migrate.py プロジェクト: mattotodd/bloggart27

 def migrate_one_comment(self, thread_id, comment_key, replies, parent_id=None):
   comment = BloogBreakingMigration.Comment.get(comment_key)
   post_args = {
       'request_type': urlfetch.POST,
       'thread_id': thread_id,
       'message': utils.strip_tags(comment.body).encode('utf-8'),
       'author_name': comment.name.encode('utf-8') if comment.name else 'Someone',
       'author_email': comment.email.encode('utf-8') if comment.email else '*****@*****.**',
       'forum_api_key': self.forum_key,
       'created_at': comment.published.strftime('%Y-%m-%dT%H:%M'),
   }
   if comment.homepage:
     post_args['author_url'] = comment.homepage.encode('utf-8')
   if parent_id:
     post_args['parent_post'] = parent_id
   post_id = disqus_request('create_post', **post_args)['message']['id']
   for parent_id, replies in itertools.groupby(replies, lambda x:x[0]):
     parent_key = db.Key.from_path('Comment', parent_id, parent=comment_key)
     deferred.defer(self.migrate_one_comment, thread_id, parent_key,
                    [x[1:] for x in replies if x[1:]], post_id)

コード例 #26

0

ファイルを表示

ファイル: lanit.py プロジェクト: mikeful/lanit-at-event-listing

def handle_remove_event():
    """
    Validate secret code and remove event, flash and redirect to frontpage
    """
    # TODO Aikaperusteinen hidaste avainten brute force -spämmäyksen estoon
    if request.method == 'GET':
        flash(u'Käytä lomaketta')
        return redirect(url_for('handle_list_events'))

    # Get data from form
    remove_short_name = request.form['short_name'].strip().lower()
    remove_secret = request.form['secret'].strip().lower()
    ayah_session_secret = strip_tags(request.form['session_secret'].strip())

    # Setup Are You A Human check
    ayah.configure(app.config['ARE_YOU_HUMAN_PUBLISHER_KEY'], app.config['ARE_YOU_HUMAN_SCORING_KEY'])

    # Validation
    if is_valid_short_name(remove_short_name) != True:
        flash(u'Lyhytnimi tai salainen avain ei täsmää. Tarkista tiedot ja yritä uudelleen.')
        return redirect(url_for('handle_list_events'))

    if is_valid_secret_key(remove_secret, app.config['SECRET_KEY_CHARACTERS']) != True:
        flash(u'Lyhytnimi tai salainen avain ei täsmää. Tarkista tiedot ja yritä uudelleen.')
        return redirect(url_for('handle_list_events'))

    if event_exists(remove_short_name) != True:
        flash(u'Lyhytnimi tai salainen avain ei täsmää. Tarkista tiedot ja yritä uudelleen.')
        return redirect(url_for('handle_list_events'))

    # Call Are You A Human scoring service
    if ayah.score_result(ayah_session_secret) == False:
        flash(u'Spämmitarkistus epäonnistui. Tarkista että et ole spämmirobotti ja/tai yritä uudelleen.')
        return redirect(url_for('handle_list_events'))

    if not remove_event_with_secret(remove_short_name, remove_secret):
        flash(u'Lyhytnimi tai salainen avain ei täsmää. Tarkista tiedot ja yritä uudelleen.')
        return redirect(url_for('handle_list_events'))
    else:
        flash(u'Tapahtuma poistettu onnistuneesti.')
        return redirect(url_for('handle_list_events'))

コード例 #27

0

ファイルを表示

ファイル: icon.py プロジェクト: tualatrix/lazy-reader

    def on_left_clicked(self):
        entry = reader.get_entry()
        title = entry['title'] 
        summary = entry['summary']
        if summary is None:
            summary = entry['content']
        updated = entry['updated']
        updated = datetime.date.fromtimestamp(updated).isoformat()
        author = entry['author']

        if notify.closed is True:
            notify.update(title, 
                '<b>%s</b> written at <i>%s</i>\n\n%s' % (author, updated, strip_tags(summary)))
            notify.attach_to_widget(self)

    #        read_more_button = gtk.Button()
    #        icon = read_more_button.render_icon(gtk.STOCK_DIALOG_INFO, gtk.ICON_SIZE_DIALOG)
    #        n.set_icon_from_pixbuf(icon)

            notify.closed = False
            notify.show()
            reader.set_read(entry)
            reader.iter_next()

コード例 #28

0

ファイルを表示

    def on_left_clicked(self):
        entry = reader.get_entry()
        title = entry['title']
        summary = entry['summary']
        if summary is None:
            summary = entry['content']
        updated = entry['updated']
        updated = datetime.date.fromtimestamp(updated).isoformat()
        author = entry['author']

        if notify.closed is True:
            notify.update(
                title, '<b>%s</b> written at <i>%s</i>\n\n%s' %
                (author, updated, strip_tags(summary)))
            notify.attach_to_widget(self)

            #        read_more_button = gtk.Button()
            #        icon = read_more_button.render_icon(gtk.STOCK_DIALOG_INFO, gtk.ICON_SIZE_DIALOG)
            #        n.set_icon_from_pixbuf(icon)

            notify.closed = False
            notify.show()
            reader.set_read(entry)
            reader.iter_next()

コード例 #29

0

ファイルを表示

ファイル: blog_posts.py プロジェクト: elidupree/eliduprees-website-source

    story["pages"] = [post for post in story["pages"] if "don't deploy" not in post]
  for post_dict in story["pages"]:
    index = index + 1
    #post_dict["path_prefix"] = story["url"]+"/"
    post_dict["long_story_name"] = name
    post_dict["long_story_index"] = index
    if "listed" in story:
      post_dict["listed"] = True
    posts ["stories"].append(post_dict)
    
for cat,post_list in posts.items():
  for post_dict in post_list:
    if "long_story_name" not in post_dict:
      post_dict["path_prefix"] = "/" if cat=="" else "/"+cat+"/"
    post_dict["category"] = cat
    post_dict["word_count"] = utils.word_count (html.unescape (utils.strip_tags (post_dict ["contents"])))
    if "auto_paragraphs" in post_dict:
      post_dict ["contents"] = utils.auto_paragraphs (post_dict ["contents"])
    if cat == 'blog':
      post_dict['contents'] += signature
      
for name,story in long_stories.items():
  story["word_count"] = 0
  for post_dict in story["pages"]:
    story["word_count"] = story["word_count"] + post_dict["word_count"]


css.insert ('''
a.small_story {display: block; padding: 0.8em 0; color: black; text-decoration: none;}
a.small_story h2 {font-weight: bold; color: black;}
a.small_story .blurb {font-size:71%;}

コード例 #30

0

ファイルを表示

ファイル: lanit.py プロジェクト: mikeful/lanit-at-event-listing

def handle_add_event():
    """
    Validate and add event, flash and redirect to frontpage
    """

    # TODO Aikaperusteinen hidaste tapahtumien lisäyksen spämmäämiseen
    if request.method == 'GET':
        flash(u'Käytä lomaketta')
        return redirect(url_for('handle_list_events'))

    # Get data from form
    add_short_name = strip_tags(request.form['short_name'].strip())
    add_name = strip_tags(request.form['name'].strip())
    add_url = request.form['url'].strip().lower()
    add_start_time = strip_tags(request.form['start_time'].strip())
    add_end_time = strip_tags(request.form['end_time'].strip())
    ayah_session_secret = strip_tags(request.form['session_secret'].strip())

    # Setup Are You A Human check
    ayah.configure(app.config['ARE_YOU_HUMAN_PUBLISHER_KEY'], app.config['ARE_YOU_HUMAN_SCORING_KEY'])

    # Validate short name
    if is_valid_short_name(add_short_name) != True:
        flash(u'Tapahtuman lyhytnimi ei ole kelvollinen. Tarkista tiedot ja yritä uudelleen.')
        return redirect(url_for('handle_list_events'))

    # Parse times and validate them
    add_start_time = parse_date(add_start_time)
    add_end_time = parse_date(add_end_time)

    if add_start_time == False:
        flash(u'Tapahtuman alkuaika ei ole kelvollinen. Tarkista tiedot ja yritä uudelleen.')
        return redirect(url_for('handle_list_events'))

    if add_end_time == False:
        flash(u'Tapahtuman päättymisaika ei ole kelvollinen. Tarkista tiedot ja yritä uudelleen.')
        return redirect(url_for('handle_list_events'))

    # Check that end time is later than start time
    if add_start_time >= add_end_time:
        flash(u'Tapahtuma ei voi päättyä ennen alkamistaan. Tarkista tiedot ja yritä uudelleen.')
        return redirect(url_for('handle_list_events'))

    # Check if event already exists with short name
    if event_exists(add_short_name):
        flash(u'Tapahtuman lyhytnimi on jo käytössä. Valitse toinen nimi tai poista olemassa oleva tapahtuma.')
        return redirect(url_for('handle_list_events'))

    # Call Are You A Human scoring service
    if ayah.score_result(ayah_session_secret) == False:
        flash(u'Spämmitarkistus epäonnistui. Tarkista että et ole spämmirobotti ja/tai yritä uudelleen.')
        return redirect(url_for('handle_list_events'))

    # Request URL and check it's valid
    add_url = validate_url(add_url)
    if add_url == False:
        flash(u'Tapahtuman URL-osoite ei ole kelvollinen. Tarkista tiedot ja yritä uudelleen.')
        return redirect(url_for('handle_list_events'))

    # TODO Spämmitarkistuksia lyhyt nimi, nimi ja URL. Domain/sanalistan mukaan pisteytys.

    # Add event to system
    if add_event(short_name=add_short_name, name=add_name, url=add_url, start_time=add_start_time,
                 end_time=add_end_time) == False:
        flash(
            u'Tapahtuman lisäämisessä tapahtui mystinen virhe. Tarkista tiedot ja yritä uudelleen. Ongelman jatkuessa ota yhteyttä.')
        return redirect(url_for('handle_list_events'))
    else:
        secret_key = get_event_secret_key(add_short_name)
        flash(
            u'Tapahtuma lisättiin onnistuneesti. Tapahtuman salainen avain on %s. Pidä se tallessa esimerkiksi poistoa varten.' % secret_key)
        return redirect(url_for('handle_list_events'))

コード例 #31

0

ファイルを表示

def post_html(contents, title, permalink, taglist, stream_only, metadata, scrutinize = True, allow_comments = True, Patreon_type = "blog"):
  head = []
  post_content = blog_server_shared.postprocess_post_string(contents, metadata["id"], title, False, scrutinize)[0]
  
  before_content_warnings = post_content
  
  content_warning_header_regex = re.compile(r"<content_warning_header"+blog_server_shared.grouped_string_regex("content_warning_header_contents")+">", re.DOTALL)
  post_content = content_warning_header_regex.sub(lambda match: ('''

<div class="story_content_warning_header">
  <p>This story contains:</p>
  '''+hidden_cw_box('''
  <ul>
    '''+match.group("content_warning_header_contents")+'''
  </ul>
  <p>Notices will also appear in-context in the story, just before the material appears.</p>
  <p>If you see other material that should be marked (such as common triggers or phobias), '''+exmxaxixl.a('e-mail me')+'''. I am serious about web accessibility, and I will respond to your concerns as soon as I can manage.</p>
  ''')+'''
</div>'''), post_content)

  content_warning_p_regex = re.compile(r"<content_warning_p"+blog_server_shared.grouped_string_regex("content_warning_p_contents")+">", re.DOTALL)
  post_content = content_warning_p_regex.sub(lambda match: secondary_hidden_cw_box('This section depicts '+match.group("content_warning_p_contents")+'.'), post_content)
  
  if post_content != before_content_warnings:
    head.append ("<script>window.elidupree.handle_content_warnings('"+ metadata ["id"]+"', false)</script>" )

  next_transcript_number = 1
  while True:
    transcript_generator = re.search(r"<transcript"+ blog_server_shared.grouped_string_regex("transcript_text")+">", post_content, re.DOTALL)
    if transcript_generator is None:
      break
    transcript_identifier_string = str(next_transcript_number)+'_'+ metadata ["id"]
    post_content = post_content [0: transcript_generator.start(0)]+'<div id="transcript_'+ transcript_identifier_string+'" class="transcript_block"><div class="transcript_header">Transcript: <a id="show_transcript_button_'+ transcript_identifier_string+'" href="javascript:;">(show)</a><a id="hide_transcript_button_'+ transcript_identifier_string+'" href="javascript:;">(hide)</a></div><div class="transcript_content id'+ transcript_identifier_string+'">'+ transcript_generator.group("transcript_text")+'</div></div>' + post_content [transcript_generator.end(0):]
    head.append('''<style> 
html.transcript_hidden_'''+ transcript_identifier_string +''' div.transcript_content.id'''+ transcript_identifier_string +''' {display: none;}
#show_transcript_button_'''+ transcript_identifier_string +''' {display: none;}
html.transcript_hidden_'''+ transcript_identifier_string +''' #show_transcript_button_'''+ transcript_identifier_string +''' {display: inline;}
html.transcript_hidden_'''+ transcript_identifier_string +''' #hide_transcript_button_'''+ transcript_identifier_string +''' {display: none;}
    </style> 
    <script>
    window.elidupree.handle_transcript ("'''+ transcript_identifier_string +'''");
    </script>''')
    next_transcript_number = next_transcript_number + 1

  if stream_only == True:
    cutter = re. compile ( r"<cut>.*?</p>.*$", re.DOTALL)
    post_content = cutter.sub ('''[...]</p>
<a class="continue_reading" href="'''+ permalink +'''">Continue reading<span class="invisible"> '''+ title +'''</span>...</a>''', post_content)
    #this sometimes cuts off anchors, so make sure fragments point at the canonical URL
    post_content = re.sub ('href="#','href="' + permalink + '#', post_content)
  else:
    post_content = re.sub ("<cut>", "", post_content)
  
  calculate_readability = (stream_only != True)
  if calculate_readability:
    #using the automated readability index
    reference = re.sub(r"\s+", " ", html.unescape (utils.strip_tags (post_content)))
    sentences = len(re.findall (r"\w\w\w.*?[.?!]", reference))
    words = utils.word_count (reference)
    characters = len(re.findall (r"\w", reference))
    if words >0 and sentences >0:
      readability = 4.71*characters/words +0.5 *words/sentences -21.43
      post_content = '<em class="debug"> Approximate readability: ' + "{:.2f}".format (readability) + " ("+ str (characters) + " characters, " + str (words) +  " words, " + str (sentences)  + " sentences, " + "{:.2f}".format (characters/words) + " characters per word, " + "{:.2f}".format (words/sentences) + " words per sentence)</em>" + post_content
  
  post_content_sections = post_content.split("<bigbreak>")
  id_str = ''
  if title:
    id_str = 'id="'+utils.format_for_url(title)+'"'
    post_content_sections[0] = '<h1><a class="post_title_link" href="'''+permalink+'">'+title+'</a></h1>'+post_content_sections[0]
  for i in range(0, len(post_content_sections)):
    post_content_sections[i] = '<div class="post_content_section">'+post_content_sections[i]+'</div>'
  return ('''
<div '''+id_str+''' class="blog_post">
  '''+(''.join(post_content_sections))+'''
</div>'''+metadata_and_comments_section_html(title, permalink, taglist, stream_only, metadata, allow_comments = allow_comments, Patreon_type = Patreon_type), "".join (head))

コード例 #32

0

ファイルを表示

 def short_description(self):
     if self.summary:
         return self.summary
     return strip_tags(self.body)

コード例 #33

0

ファイルを表示

ファイル: validator.py プロジェクト: torrents-com/content

                rs[domain['_id']] = False
                print "%s no es valido(1)" % domain['_id']

            else:
                test = test['test']
                #actualiza el lastSeenDate
                col_domain.update({"_id": url}, {"$set": {"test.ls": now}})

                #Se deja de comprobar si alguna validación ya ha dado negativo
                if not domain['_id'] in rs or rs[domain['_id']]:
                    #Deben coincidir los metadatos
                    for key, val in test['md'].items():
                        if key == "description":
                            #La descripcion la deja pasar
                            continue
                        if not key in data[url] or strip_tags(
                                data[url][key].lower()) != strip_tags(
                                    val.lower()):
                            if changes_allowed:
                                if not key in data[url]:
                                    print "eliminando %s de la url %s" % (key,
                                                                          url)
                                    col_domain.update(
                                        {"_id": url},
                                        {"$unset": {
                                            "test.md." + key: ""
                                        }})
                                else:
                                    #save new data
                                    print "salvando como nuevo data %s con el valor %s en la url %s" % (
                                        key, strip_tags(data[url][key]), url)
                                    col_domain.update({"_id": url}, {

コード例 #34

0

ファイルを表示

def mapQuestionsToResponses(responseData, questionData, surveyId):
    print("STARTING QUESTION TO RESPONSE MAPPING PROCESS...\n")

    # Step 1: Create new array for all cleansed answers
    cleanResponses = []

    # Step 2: Consolidate muti-tier question format in response data
    for response in responseData:
        if float(response['values']['progress']) == 100:
            try:
                cleanResponse = dict()
                cleanResponse['responseId'] = response['responseId']
                cleanResponse['startDate'] = response['values']['startDate']
                cleanResponse['endDate'] = response['values']['endDate']
                cleanResponse['recordedDate'] = response['values'][
                    'recordedDate']
                cleanResponse['locationLongitude'] = response['values'][
                    'locationLongitude']
                cleanResponse['locationLatitude'] = response['values'][
                    'locationLatitude']
                cleanResponse['userLanguage'] = response['values'][
                    'userLanguage']
                cleanResponse['originalResponseString'] = str(response)
                cleanResponse['surveyId'] = surveyId
            except:
                print(
                    "Error: An exception occurred: One of the response keys is missing or mispelled in the base code. The response is the following: \n"
                )
                print(response)
        else:
            continue

        # we need to read all question attributes in labels
        # segregate answers by question title
        # specifically get surveyNumber, Submitter Gender, Submitter Age
        responseLabelData = response['labels']
        answers = []
        for key, value in responseLabelData.items():
            # only focus on the keys that are QID answers
            if "QID" in key:
                # assign varying indices needed
                qid = key
                questionNumber = qid.split("ID")[1]
                subQuestionNumber = "none"

                print("PROCESSING ANSWER: " + qid + "\n")
                print("QUESTION NUMBER NOW: " + questionNumber + "\n")
                print("SUB QUESTION NUMBER NOW: " + subQuestionNumber + "\n")

                # account for varying question types
                if "_" in qid:
                    subQuestionNumber = questionNumber.split("_")[1]
                    questionNumber = questionNumber.split("_")[0]
                baseQuestionID = "QID" + questionNumber

                if "#" in qid:
                    # fix baseQuestionID to proper string
                    baseQuestionID = qid.split("#")[0]

                # communicate current values
                print("QUESTION NUMBER NOW: " + questionNumber + "\n")
                print("SUB QUESTION NUMBER NOW: " + subQuestionNumber + "\n")
                print("BASE QUESTION ID: " + baseQuestionID + "\n")

                # create question type index dict
                questionTypes = {
                    "MC": "Multiple Choice",
                    "TE": "Text Entry",
                    "DB": "Descriptive Text or Graphics",
                    "RO": "Ranked Order",
                    "Slider": "Slider Question",
                    "SBS": "Side by Side",
                    "Matrix": "Matrix Table"
                }

                if questionData[baseQuestionID]['QuestionType'] in [
                        "MC", "TC", "TE", "DB"
                ]:
                    # dynamically create new answer dict
                    answer = responseLabelData[qid]
                    if "NPS_GROUP" in qid:
                        if responseLabelData[qid] == "Promoter":
                            answer = "9 or 10"
                        elif responseLabelData[qid] == "Passive":
                            answer = "7 or 8"
                        else:
                            answer = "0-6"
                    answers.append({
                        'QuestionText':
                        questionData[baseQuestionID]['QuestionText'],
                        "QuestionType":
                        questionTypes[questionData[baseQuestionID]
                                      ['QuestionType']],
                        "QuestionAnswer":
                        answer,
                        "QuestionId":
                        qid
                    })
                elif questionData[baseQuestionID]['QuestionType'] in [
                        "RO", "Slider", "Matrix"
                ]:
                    # dynamically create new answer dict
                    matrixQuestions = questionData[baseQuestionID][
                        'QuestionChoices']
                    if is_int(subQuestionNumber):
                        # find which of the questions is being answered
                        answers.append({
                            'QuestionText':
                            questionData[baseQuestionID]['QuestionText'] +
                            " (" + strip_tags(
                                matrixQuestions[subQuestionNumber]['Display'])
                            + ")",
                            "QuestionType":
                            questionTypes[questionData[baseQuestionID]
                                          ['QuestionType']],
                            "QuestionAnswer":
                            responseLabelData[qid],
                            "QuestionId":
                            qid
                        })
                elif questionData[baseQuestionID]['QuestionType'] == "SBS":
                    # dynamically create new answer dict
                    # EX: subQuestionNumber = 1
                    # EX: questionNumber = 8#1
                    # EX: baseQuestionID = QID8#1

                    # create new variable side to determine question section
                    questionSection = questionNumber.split("#")[1]

                    # collect statements used across question sections
                    matrixQuestions = questionData[baseQuestionID][
                        'QuestionChoices']

                    answers.append({
                        'QuestionText':
                        questionData[baseQuestionID]['QuestionText'] + " (" +
                        strip_tags(
                            matrixQuestions[subQuestionNumber]['Display']) +
                        ": Question Section " + questionSection + ")",
                        "QuestionType":
                        "Side by Side",
                        "QuestionAnswer":
                        responseLabelData[qid],
                        "QuestionId":
                        qid
                    })

        # set answer to cleanResponse
        cleanResponse['answers'] = answers

        # set respondent age, gender, and survey number
        surveyNumber = ""
        for answer in cleanResponse['answers']:
            if "how old are you" in answer["QuestionText"].lower():
                try:
                    age = int(answer["QuestionAnswer"])
                    cleanResponse['workerAge'] = makeAgeBin(age)
                except ValueError:
                    cleanResponse['workerAge'] = answer["QuestionAnswer"]
            if "what is your gender" in answer["QuestionText"].lower():
                cleanResponse['workerGender'] = answer["QuestionAnswer"]
            if "survey number" in answer["QuestionText"].lower():
                surveyNumber += str(answer["QuestionAnswer"])
                print(surveyNumber)
            if "which factory do you work at" in answer["QuestionText"].lower(
            ):
                cleanResponse['workerFactory'] = answer["QuestionAnswer"]

        cleanResponse['surveyNumber'] = str(surveyNumber)

        # final cleanse of cleanResponse then append clean response
        attributes = ('surveyNumber', 'workerFactory', 'workerAge', 'answers',
                      'responseId', 'startDate', 'endDate', 'recordedDate',
                      'locationLongitude', 'locationLatitude', 'userLanguage',
                      'originalResponseString', 'surveyId', 'workerGender')

        if all(key in cleanResponse for key in attributes):
            print("CHECKING IF TEST RESPONSE..\n")
            # 00000, 11111; 12345; 99999; 10000
            if (cleanResponse['surveyNumber'] != "00000"
                    or cleanResponse['surveyNumber'] != "11111"
                    or cleanResponse['surveyNumber'] != "12345"
                    or cleanResponse['surveyNumber'] != "99999"
                    or cleanResponse['surveyNumber'] != "10000"
                    or cleanResponse['surveyNumber'] != "1000"
                    or cleanResponse['surveyNumber'] != "5408"):
                print("NOT TEST RESPONSE. ADDING TO LIST!..\n")
                cleanResponses.append(cleanResponse)

    # Step 3: return clean responses
    print("COMPLETED ANSWER TO QUESTION MATCHING!...\n")
    return cleanResponses

コード例 #35

0

ファイルを表示

ファイル: snoc.py プロジェクト: Web5design/twittermap

 def receive_tweet(self, tweet, word=None):
     user = '******' + tweet['user']['screen_name']
     text = user + ' ' + utils.strip_tags(tweet['text'])
     if not self.filters or any( filt in text.lower() for filt in self.filters ):
         self.ccipca_iter(text, text, extras=word)

コード例 #36

0

ファイルを表示

ファイル: snoc.py プロジェクト: imclab/twittermap

 def receive_tweet(self, tweet, word=None):
     user = '******' + tweet['user']['screen_name']
     text = user + ' ' + utils.strip_tags(tweet['text'])
     if not self.filters or any(filt in text.lower()
                                for filt in self.filters):
         self.ccipca_iter(text, text, extras=word)

コード例 #37

0

ファイルを表示

ファイル: gui_helpers.py プロジェクト: BonsaiDen/Atarashii

 def notifcation(self, typ, msg):
     if self.settings.is_true('notify'):
         typ = 'dialog-error' if typ == MESSAGE_ERROR else 'dialog-warning'
         self.main.notifier.add(('Atarashii', strip_tags(msg),
                                  typ, 'theme:' + typ))

コード例 #38

0

ファイルを表示

def title_formatted_title(post_dict):
  return utils.strip_tags(post_dict["title"])