コード例 #1
0
def parse_transcript(path):
    """
    Parse a Premiere .tsv file, calculate total seconds and write to JSON for use in the app.
    """
    data = {
        'subtitles': []
    }
    filename, ext = path.split('/')[-1].split('.')

    with codecs.open(path, 'rb', encoding='utf16') as f:
        transcript = f.read().encode('utf-8')
        tab_reader = csv.reader(BytesIO(transcript), delimiter='\t')
        headers = tab_reader.next()
        for row in tab_reader:
            # Premiere exports kind of suck
            if row[0] == '':
                words = smartypants(row[1].strip())
                time_str = row[2]
            else:
                words = smartypants(row[0].strip())
                time_str = row[1]

            hours, minutes, seconds, frame = [int(x) for x in time_str.split(';')]

            decimal = (float(frame) / 24)
            total_seconds = (hours * 3600) + (minutes * 60) + (seconds + decimal)

            segment = {
                'time': total_seconds,
                'transcript': words,
            }
            data['subtitles'].append(segment)

    with open('www/data/%s.json' % filename, 'w') as wf:
        wf.write(json.dumps(data))
コード例 #2
0
ファイル: updater.py プロジェクト: robertozoia/ristretto
    def publish_index_pages(self):

        file_posts = self.get_all_file_posts_by_date()
        
        posts = [ Post(f, self.s) for f in self.get_all_file_posts_by_date()]

        for post in posts:
            post.content = smartypants(self.md_to_html(post.content))
            post.title = smartypants(post.title)


        first_post = 0
        last_post = first_post + self.s.POSTS_PER_PAGE
        page_number = 0
        dest_fname = self.s.INDEX_PAGE
        dest_dir = self.s.WWW_DIR
        prev_page_url = None
        next_page_url = None


        while first_post < len(posts):

            p = posts[first_post:last_post]

            if page_number == 0:
                local_fname = dest_fname
            else:
                local_fname = "%s-%d%s" % (os.path.splitext(dest_fname)[0], page_number, self.s.HTML_EXT )

            # Pagination
            if len(posts) <= last_post:
                # No next page
                next_page_url = None
            else:
                next_page_url = "%s-%d%s" % (os.path.splitext(self.s.INDEX_PAGE)[0], page_number + 1, self.s.HTML_EXT)

            if first_post - self.s.POSTS_PER_PAGE < 0:
                prev_page_url = None
            else:
                if page_number == 1:
                    prev_page_url = self.s.INDEX_PAGE
                else:
                    prev_page_url = "%s-%d.html" % (os.path.splitext(self.s.INDEX_PAGE)[0], page_number - 1)
            

            self.write_posts_to_file(
                posts=p,
                fname=local_fname,
                dir=dest_dir,
                template=self.s.INDEX_TEMPLATE,
                prev_page_url=prev_page_url,
                next_page_url=next_page_url,
            )

            logging.info("Wrote posts %d-%d to %s." % (first_post, last_post, local_fname))

            first_post = last_post
            last_post = first_post + self.s.POSTS_PER_PAGE
            page_number = page_number + 1 
コード例 #3
0
ファイル: updater.py プロジェクト: robertozoia/ristretto
    def prepare_post(self, post):

        post.title = smartypants(post.title)
        post.content = smartypants(markdown.markdown(post.content, 
            extensions=self.s.MD_EXTENSIONS,
            extension_configs=self.s.MD_EXTENSION_CONFIGS,
            output_format=self.s.MD_OUTPUT_FORMAT
        ))
コード例 #4
0
ファイル: blog.py プロジェクト: handroll/handroll
 def __init__(self, **kwargs):
     self.date = kwargs['date']
     self.source_file = kwargs['source_file']
     self.summary = smartypants.smartypants(kwargs['summary'])
     self.title = smartypants.smartypants(kwargs['title'])
     self.route = kwargs['route']
     self.url = kwargs['url']
     # Having the posts enables a blog post to find its relationships.
     self._posts = kwargs['posts']
コード例 #5
0
ファイル: blog.py プロジェクト: iter8ve/handroll
 def __init__(self, **kwargs):
     self.date = kwargs["date"]
     self.source_file = kwargs["source_file"]
     self.summary = smartypants.smartypants(kwargs["summary"])
     self.title = smartypants.smartypants(kwargs["title"])
     self.route = kwargs["route"]
     self.url = kwargs["url"]
     # Having the posts enables a blog post to find its relationships.
     self._posts = kwargs["posts"]
コード例 #6
0
 def __init__(self, **kwargs):
     self.date = kwargs['date']
     self.source_file = kwargs['source_file']
     self.summary = smartypants.smartypants(kwargs['summary'])
     self.title = smartypants.smartypants(kwargs['title'])
     self.route = kwargs['route']
     self.url = kwargs['url']
     # Having the posts enables a blog post to find its relationships.
     self._posts = kwargs['posts']
コード例 #7
0
 def smartquotes(self, text: str) -> str:
     """If enabled, apply 'smart quotes' to the text; replaces quotes and dashes by nicer looking symbols"""
     if self.supports_smartquotes and self.do_smartquotes:
         if hasattr(smartypants.Attr, "u"):
             return smartypants.smartypants(text, smartypants.Attr.q | smartypants.Attr.B |
                                            smartypants.Attr.D | smartypants.Attr.e | smartypants.Attr.u)
         else:
             # older smartypants lack attribute 'u' for avoiding html entity creation
             txt = smartypants.smartypants(text, smartypants.Attr.q | smartypants.Attr.B |
                                           smartypants.Attr.D | smartypants.Attr.e)
             import html.parser
             return html.parser.unescape(txt)    # type: ignore
     return text
コード例 #8
0
 def make_item_content_encoded(self, text1, text2, url, comment_name):
     """
     Called from item_content_encoded() in children.
     text1 and text2 are chunks of HTML text (or empty strings).
     url is the URL of the item (no domain needed, eg '/diary/1666/10/31/').
     comment_name is one of 'comment' or 'annotation'.
     """
     return '%s %s <p><strong><a href="%s#%ss">Read the %ss</a></strong></p>' % (
         force_unicode(smartypants.smartypants(text1)),
         force_unicode(smartypants.smartypants(text2)),
         add_domain(Site.objects.get_current().domain, url),
         comment_name,
         comment_name
     )
コード例 #9
0
def render_gist(request, id, raw):
    """Render a raw gist and store it"""
    gist = {
        'id': raw['id'],
        'html_url': raw['html_url'],
        'public': raw['public'],
        'description': raw['description'],
        'created_at': iso8601.parse_date(raw['created_at']),
        'updated_at': iso8601.parse_date(raw['updated_at']),
        'fetched_at': rdb.now(),
        'author_id': raw['user']['id'],
        'author_login': raw['user']['login'],
        'files': [],
    }

    for gistfile in raw['files'].values():
        format = RENDERABLE.get(gistfile['language'], None)

        if format is None:
            continue

        output = None

        if format is FORMAT_MD:
            payload = {
                'mode': 'gfm',
                'text': gistfile['content'],
            }
            req_render = requests.post('https://api.github.com/markdown',
                                       params=GITHUB_AUTH_PARAMS,
                                       data=unicode(json.dumps(payload)))
            if req_render.status_code != 200:
                logger.warn('Render {} file {} failed: {}'.format(
                    id, gistfile['filename'], req_render.status_code))
                continue
            else:
                output = smartypants.smartypants(req_render.text)

        if format is FORMAT_RST:
            rendered = render_rst(gistfile['content'],
                                  writer_name='html')['fragment']
            output = smartypants.smartypants(rendered)

        if output is not None:
            gistfile['rendered'] = output
            gist['files'].append(gistfile)

    rdb.table('gists').insert(gist, upsert=True).run(request.rdbconn)
    return gist
コード例 #10
0
ファイル: views.py プロジェクト: OWLOOKIT/gistio
def render_gist(request, id, raw):
    """Render a raw gist and store it"""
    gist = {
        'id': raw['id'],
        'html_url': raw['html_url'],
        'public': raw['public'],
        'description': raw['description'],
        'created_at': iso8601.parse_date(raw['created_at']),
        'updated_at': iso8601.parse_date(raw['updated_at']),
        'fetched_at': rdb.now(),
        'author_id': raw['user']['id'],
        'author_login': raw['user']['login'],
        'files': [],
    }

    for gistfile in raw['files'].values():
        format = RENDERABLE.get(gistfile['language'], None)

        if format is None:
            continue

        output = None

        if format is FORMAT_MD:
            payload = {
                'mode': 'gfm',
                'text': gistfile['content'],
            }
            req_render = requests.post('https://api.github.com/markdown',
                                       params=GITHUB_AUTH_PARAMS,
                                       data=unicode(json.dumps(payload)))
            if req_render.status_code != 200:
                logger.warn('Render {} file {} failed: {}'.format(id, gistfile['filename'], req_render.status_code))
                continue
            else:
                output = smartypants.smartypants(req_render.text)

        if format is FORMAT_RST:
            rendered = render_rst(gistfile['content'], writer_name='html')['fragment']
            output = smartypants.smartypants(rendered)

        if output is not None:
                gistfile['rendered'] = output
                gist['files'].append(gistfile)


    rdb.table('gists').insert(gist, upsert=True).run(request.rdbconn)
    return gist
コード例 #11
0
ファイル: static.py プロジェクト: nprapps/play-quiz
 def convert_entities(obj):
     for key in obj.iterkeys():
         if isinstance(obj[key], unicode):
             obj[key] = smartypants(obj[key])
             obj[key] = obj[key].encode('ascii', 'xmlcharrefreplace')
         else:
             convert_entities(obj[key])
コード例 #12
0
ファイル: views.py プロジェクト: kirillyar/pony-forum
def prettify_title(title):
    """Sanitizes all (ALL) HTML elements in titles while prettifying the quotes
    and dashes used in the titles of threads and categories.
    """
    return bleach.clean(\
                        smartypants(title, "2"),\
                        tags=[], attributes={})
コード例 #13
0
 def convert_entities(obj):
     for key in obj.iterkeys():
         if isinstance(obj[key], unicode):
             obj[key] = smartypants(obj[key])
             obj[key] = obj[key].encode('ascii', 'xmlcharrefreplace')
         else:
             convert_entities(obj[key])
コード例 #14
0
ファイル: utils.py プロジェクト: newsdev/nyt-scotus
def webfix_unicode(possible_string):
    """
    This is ugly but it will create Times-approved HTML
    out of terrible cut-and-paste from decision text.
    """
    CHAR_MAP = [
        (u'\xa7', u'&sect;'),
        (u'\u2014', u'&mdash;'),
        (u'\u2013', u'&ndash;'),
        (u'\x97', u'&mdash;'),
        (u'\xa4', u'&euro;'),
        (u'\u201c', u'"'),
        (u'\u201d', u'"'),
        (u'\x96', u'&#150;'),
    ]

    if isinstance(possible_string, basestring):
        string = possible_string
        string = string.strip()
        for char, replace_char in CHAR_MAP:
            string = string.replace(char, replace_char)
        string = string.decode('utf-8')
        string = unicode(string)
        string = ftfy.fix_text(string)
        string = smartypants.smartypants(string)
        return string

    return possible_string
コード例 #15
0
ファイル: base.py プロジェクト: lbarchive/b.py
    def generate(self, markup=None):
        """Generate HTML

    >>> class Handler(BaseHandler):
    ...   def _generate(self, markup=None): return markup
    >>> handler = Handler(None)
    >>> print(handler.generate('foo "bar"'))
    foo "bar"
    >>> handler.options['smartypants'] = True
    >>> print(handler.generate('foo "bar"'))
    foo &#8220;bar&#8221;
    """

        if markup is None:
            markup = self.markup

        html = self._generate(markup)

        if self.options.get('smartypants', False):
            if not HAS_SMARTYPANTS:
                warnings.warn(
                    "smartypants option is set, "
                    "but the library isn't installed.", RuntimeWarning)
                return html
            Attr = smartypants.Attr
            html = smartypants.smartypants(html, Attr.set1 | Attr.w)

        if self.SUPPORT_EMBED_IMAGES and self.options.get(
                'embed_images', False):
            html = self.embed_images(html)

        return html
コード例 #16
0
ファイル: basenodehandler.py プロジェクト: xuyongmin/rst2pdf
 def apply_smartypants(self, text, smarty, node):
     # Try to be clever about when to use smartypants
     if node.__class__ in (docutils.nodes.paragraph,
                           docutils.nodes.block_quote,
                           docutils.nodes.title):
         return smartypants(text, smarty)
     return text
コード例 #17
0
ファイル: utils.py プロジェクト: newsdev/nyt-scotus
def webfix_unicode(possible_string):
    """
    This is ugly but it will create Times-approved HTML
    out of terrible cut-and-paste from decision text.
    """
    character_map = [
        ('\xa7', '&sect;'),
        ('\u2014', '&mdash;'),
        ('\u2013', '&ndash;'),
        ('\x97', '&mdash;'),
        ('\xa4', '&euro;'),
        ('\u201c', '"'),
        ('\u201d', '"'),
        ('\x96', '&#150;'),
    ]

    if isinstance(possible_string, str):
        string = possible_string
        string = string.strip()
        for char, replace_char in character_map:
            string = string.replace(char, replace_char)
        string = string.decode('utf-8')
        string = ftfy.fix_text(string)
        string = smartypants.smartypants(string)
        return string

    return possible_string
コード例 #18
0
def prettify_title(title):
    """Sanitizes all (ALL) HTML elements in titles while prettifying the quotes
    and dashes used in the titles of threads and categories.
    """
    return bleach.clean(\
                        smartypants(title, "2"),\
                        tags=[], attributes={})
コード例 #19
0
ファイル: xslt.py プロジェクト: decorator-factory/nedbatcom
def content_transform(name, xmltext, child=None, params={}):
    #print("XSLT: %.80s(%s) %r" % (xmltext.replace('\n', ' '), child or '-', params.get('blogmode', '')))
    f = BytesIO(xmltext.encode('utf-8'))
    try:
        doc = etree.parse(f)
    except:
        print("Text was {!r}".format(xmltext))
        raise
    if child:
        doc = doc.find(child)
    params = dict(params)
    params.update({
        'base': string_param(settings.BASE),
    })
    html = str(xslt_xform(doc, **params))
    # smartypants doesn't handle </a>' properly.
    html = re.sub(r"(</\w+>)'", r"\1&#8217;", html)
    html = smartypants.smartypants(html,
                                   smartypants.Attr.q | smartypants.Attr.n)
    #print("Transformed {!r} into {!r}".format(xmltext[:80], html[:80]))
    for entry in xslt_xform.error_log:
        if entry.filename == '<string>':
            fname = name
        else:
            fname = entry.filename
        print("Message, %s @ %d: %s" % (fname, entry.line, entry.message))
    return html
コード例 #20
0
ファイル: scraper.py プロジェクト: olekang/daily-paper
 def prettify_text(self, text):
     """
     Make text more nicerer. Run it through SmartyPants and Widont.
     """
     text = self.widont(text)
     text = smartypants.smartypants(text)
     return text
コード例 #21
0
def smartypants(text):
    """Applies smarty pants to curl quotes.

    >>> smartypants('The "Green" man')
    u'The &#8220;Green&#8221; man'
    """

    return _smartypants.smartypants(text)
コード例 #22
0
ファイル: updater.py プロジェクト: robertozoia/ristretto
    def publish_pages(self, posts=None, force_publish=False):
        
        if force_publish:
            posts = [Post(os.path.join(self.s.PAGES_DIR, f), self.s) for f in os.listdir(self.s.PAGES_DIR) if f.endswith(self.s.MD_EXT)]

        for post in posts:

            post.content = smartypants(self.md_to_html(post.content))
            post.title = smartypants(post.title)

            html_fname = "%s%s" % (post.slug, self.s.HTML_EXT)
            html_dir = os.path.join(self.s.WWW_DIR, self.s.WWW_PAGES_URL)
            html_full_path = os.path.join(html_dir, html_fname)

            tools.mkdirp(html_dir)
            # TODO: check dir owner/permission
            self.write_single_post_to_file(post=post, fname=html_full_path, template=self.s.PAGES_TEMPLATE)
コード例 #23
0
def _smartypants(text):
    attrs = (smartypants.Attr.b | smartypants.Attr.D | smartypants.Attr.e)
    return (smartypants.smartypants(text, attrs).replace(
        "&#8211;",
        "–").replace("&#8212;",
                     "—").replace("&#8230;",
                                  "…").replace("&#8220;",
                                               "“").replace("&#8221;", "”"))
コード例 #24
0
def _to_smart(verse):
    verse = verse.replace(",`",", '")
    verse = verse.replace("`","'")
    out = smartypants(verse)
    parser = HTMLParser()
    out = parser.unescape(out)

    return out
コード例 #25
0
ファイル: models.py プロジェクト: groundupnews/gu
 def clean_typography(self, text):
     return smartypants.smartypants(text).\
         replace("&nbsp;", "").\
         replace("  ", " ").\
         replace(u'&#8217;', u'’').\
         replace(u'&#8220;', u'“').\
         replace(u'&#8221;', u'”').\
         replace(u'\xa0 ', u' ').replace(u' \xa0', u' ')
コード例 #26
0
ファイル: plugins.py プロジェクト: MarkusH/blog
 def smartypants_wrapper(text):
     try:
         import smartypants
     except ImportError:
         from typogrify.filters import TypogrifyError
         raise TypogrifyError(
             "Error in {% smartypants %} filter: The Python smartypants "
             "library isn't installed."
         )
     else:
         attr = smartypants.default_smartypants_attr | smartypants.Attr.w
         content = smartypants.smartypants(text, attr=attr)
         if isinstance(text, AMPString):
             amp_data = text.amp_data
             content = AMPString(content)
             content.amp_data = smartypants.smartypants(amp_data, attr=attr)
         return content
コード例 #27
0
def smartypants_filter(text):
    """
    Smarty pants
    """
    if text:
        return smartypants(text)
    else:
        return ''
コード例 #28
0
    def smartypants_wrapper(text):
        try:
            import smartypants
        except ImportError:
            from typogrify.filters import TypogrifyError

            raise TypogrifyError(
                "Error in {% smartypants %} filter: The Python smartypants "
                "library isn't installed.")
        else:
            attr = smartypants.default_smartypants_attr | smartypants.Attr.w
            content = smartypants.smartypants(text, attr=attr)
            if isinstance(text, AMPString):
                amp_data = text.amp_data
                content = AMPString(content)
                content.amp_data = smartypants.smartypants(amp_data, attr=attr)
            return content
コード例 #29
0
 def apply_smartypants(self, text, smarty, node):
     # Try to be clever about when to use smartypants
     if isinstance(node,
                   (docutils.nodes.paragraph, docutils.nodes.block_quote,
                    docutils.nodes.title)):
         smarty = _str_attr_to_int(smarty)
         return smartypants(text, smarty)
     return text
コード例 #30
0
def smartypants_filter(text):
    """
    Smarty pants
    """
    if text:
        return smartypants(text)
    else:
        return ''
コード例 #31
0
def smartypants(text):
    """Applies smarty pants to curl quotes.

    >>> smartypants('The "Green" man')
    u'The &#8220;Green&#8221; man'
    """

    return _smartypants.smartypants(text)
コード例 #32
0
def _to_smart(verse):
    verse = verse.replace(",`", ", '")
    verse = verse.replace("`", "'")
    out = smartypants(verse)
    parser = HTMLParser()
    out = parser.unescape(out)

    return out
コード例 #33
0
def clean_typography(text):
    return smartypants.smartypants(text).\
        replace("&nbsp;", " ").\
        replace("  ", " ").\
        replace(u'&#8217;', u'’').\
        replace(u'&#8220;', u'“').\
        replace(u'&#8221;', u'”').\
        replace(u'\xa0 ', u' ').replace(u' \xa0', u' ')
コード例 #34
0
ファイル: unshitify.py プロジェクト: guidoism/prettyweb
 def get(self):
   urls = self.get_query_arguments('url')
   if urls and len(urls) == 1:
     url = urls[0]
     doc = Document(requests.get(url).text)
     self.write(smartypants(doc.summary()))
     self.write(STYLE)
   else:
     self.write("Please provide ?url=[your-url]")
コード例 #35
0
ファイル: app.py プロジェクト: TylerFisher/euphonos2
def about():
    context = make_context()

    f = codecs.open("posts/intro.md", mode="r", encoding="utf-8")
    contents = f.read()
    html = markdown.markdown(smartypants(contents))
    context['markdown'] = Markup(html)

    return render_template('post.html', **context)
コード例 #36
0
ファイル: app.py プロジェクト: TylerFisher/euphonos2
def about():
    context = make_context()

    f = codecs.open("posts/intro.md", mode="r", encoding="utf-8")
    contents = f.read()
    html = markdown.markdown(smartypants(contents))
    context['markdown'] = Markup(html)

    return render_template('post.html', **context)
コード例 #37
0
def smartquotes(text):
    """Applies smarty pants to curl quotes.

    >>> smartquotes('The "Green" man')
    u'The &#8220;Green&#8221; man'
    """
    text = unicode(text)
    output = smartypants.smartypants(text)
    return output
コード例 #38
0
 def get(self):
     urls = self.get_query_arguments('url')
     if urls and len(urls) == 1:
         url = urls[0]
         doc = Document(requests.get(url).text)
         self.write(smartypants(doc.summary()))
         self.write(STYLE)
     else:
         self.write("Please provide ?url=[your-url]")
コード例 #39
0
def smartquotes(text):
    """Applies smarty pants to curl quotes.

    >>> smartquotes('The "Green" man')
    u'The &#8220;Green&#8221; man'
    """
    text = unicode(text)
    output = smartypants.smartypants(text)
    return output
コード例 #40
0
def fileToSentenceList(pathToTextFile):
	# Import string from file
	file = io.open(pathToTextFile, mode='r', buffering=-1, encoding=None, errors=None, newline=None, closefd=True)
	rawString = file.read().strip()

	# Basic cleaning: Replace line breaks with spaces
	def removeLineBreaks(string):
		cleanString = re.sub("[\n\r]+", " ", string)	 # Linke breaks to spaces
		cleanString = re.sub("\s{2,}", " ", cleanString) # Remove double spaces
		return cleanString

	cleanString = removeLineBreaks(rawString);
	
	# Use nltk to tokenize sentences
	# See http://www.nltk.org/api/nltk.tokenize.html#module-nltk.tokenize
	sentences = sent_tokenize(cleanString)

	# Look at all the sentences and throw out things that we don't like
	buffer = sentences
	sentences = []
	for sentence in buffer:

		# 1.
		# Throw out words that don't begin w/ capital letter (happens often after direct speech).
		# These are correct sentences but I prefer not to have them in the pool because they make little
		# sense without context.
		regex = '^[\s({\["\'“‘\-«»‹›]*[A-ZÄ-Ü0-9]'
		match = re.match(regex, sentence)
		if match is None:
			#print "thrown out b/c sentence doesn't start w/ capital letter: ", sentence
			continue

		# 2.
		# Throw out one-word or two-word sentences that contain numbers
		# They are probably headlines: 'Chapter 2.' or '1.F.1.'
		if sentence.count(" ") < 2 and re.search("\d", sentence) is not None:
			#print "thrown out b/c it seems like a nonsensical headline:", sentence
			continue

		# Remove white-space at the beginning and end
		sentence = sentence.strip()
		
		# Use typographically correct quotation marks, apostrophes and dashes
		sentence = HTMLParser().unescape(smartypants.smartypants(sentence))
		
		# Avoid unclosed (or unopened) quotation marks, parentheses, brackets, braces
		sentence = complete_pairs(sentence)

		sentences.append({
			'sentence': sentence,
			'numberOfWords': sentence.count(' ') + 1,
			'file': pathToTextFile,
			'randomPoint': [random.random(), 0] # For efficient random entry retrieval. See http://stackoverflow.com/a/9499484/836005
		})
		
	return sentences
コード例 #41
0
def download_story(story_id):
    # TODO: probably use {'drafts': 0, 'include_deleted': 0}
    storyinfo = session.get(API_STORYINFO + story_id, params={'drafts': 1, 'include_deleted': 1}).json()

    story_title = storyinfo['title']
    story_description = storyinfo['description']
    story_createDate = dateutil.parser.parse(storyinfo['createDate'])
    story_modifyDate = dateutil.parser.parse(storyinfo['modifyDate'])
    story_author = storyinfo['user']['name']
    story_categories = [categories[c] for c in storyinfo['categories'] if c in categories] # category can be 0
    story_rating = storyinfo['rating'] # TODO: I think 4 is adult?
    story_cover = io.BytesIO(session.get(storyinfo['cover']).content)
    story_url = storyinfo['url']

    print('Story "{story_title}": {story_id}'.format(story_title=story_title, story_id=story_id))

    # Setup epub
    book = ez_epub.Book()
    book.title = story_title
    book.authors = [story_author]
    book.sections = []
    book.impl.addCover(fileobj=story_cover)
    book.impl.description = HTML(story_description, encoding='utf-8') # TODO: not sure if this is HTML or text
    book.impl.url = story_url
    book.impl.addMeta('publisher', 'Wattpad - scraped')
    book.impl.addMeta('source', story_url)

    for part in storyinfo['parts']:
        chapter_title = part['title']

        if part['draft']:
            print('Skipping "{chapter_title}": {chapter_id}, part is draft'.format(chapter_title=chapter_title, chapter_id=chapter_id))
            continue

        if 'deleted' in part and part['deleted']:
            print('Skipping "{chapter_title}": {chapter_id}, part is deleted'.format(chapter_title=chapter_title, chapter_id=chapter_id))
            continue

        chapter_id = part['id']

        # TODO: could intelligently only redownload modified parts
        chapter_modifyDate = dateutil.parser.parse(part['modifyDate'])

        print('Downloading "{chapter_title}": {chapter_id}'.format(chapter_title=chapter_title, chapter_id=chapter_id))

        chapter_html = session.get(API_STORYTEXT, params={'id': chapter_id, 'output': 'json'}).json()['text']
        chapter_html = smartypants.smartypants(chapter_html)


        section = ez_epub.Section()
        section.html = HTML(chapter_html, encoding='utf-8')
        section.title = chapter_title
        book.sections.append(section)

    print('Saving epub')
    book.make('./{title}'.format(title=book.title.translate(ILLEAGAL_FILENAME_CHARACTERS)))
コード例 #42
0
 def apply_smartypants(self, text, smarty, node):
     # Try to be clever about when to use smartypants
     if isinstance(node, (
         docutils.nodes.paragraph,
         docutils.nodes.block_quote,
         docutils.nodes.title
     )):
         smarty = _str_attr_to_int(smarty)
         return smartypants(text, smarty)
     return text
コード例 #43
0
    def smarty_pants(self, text):
        """Applies smarty pants to html text"""

        # Try to load smartypants
        try:
            import smartypants
            return smartypants.smartypants(text)
        except ImportError:
            pass  # this should be logged maybe??? Right now, silently ignored

        return text
コード例 #44
0
 def add(self, posts):
     """Add the posts and generate a blog list."""
     li_html = []
     for post in posts:
         # Put the smartified title back into the post.
         post.title = title = smartypants.smartypants(post.title)
         li_html.append(
             u'<li><a href="{route}">{title}</a></li>'.format(
                 route=post.route, title=title))
     self._blog_list = u'\n'.join(li_html)
     self._posts = posts
コード例 #45
0
ファイル: wanja.py プロジェクト: williamjacksn/wanja
def parse_post(src_path):
    _post = {}
    with codecs.open(src_path, 'r', 'utf-8') as src:
        raw_title = src.readline().strip()
        smarty_title = smartypants.smartypants(raw_title)
        _post['title'] = flask.Markup(smarty_title)
        _post['title_first_char'] = first_alpha_char(raw_title).upper()
        _ = src.readline().strip()
        while True:
            meta = src.readline().strip()
            if not meta:
                break
            if ':' in meta:
                meta_key, sep, meta_val = meta.partition(':')
                _post[meta_key.lower().strip()] = meta_val.strip()
        mkd = markdown.markdown(src.read())
        _post['body'] = smartypants.smartypants(mkd)
    _post['slug'] = get_slug_from_path(src_path)
    _post['published'] = parse_datetime(_post['published'])
    _post['year'] = _post.get('published').year
    _post['url'] = get_post_url(_post)
    return _post
コード例 #46
0
ファイル: utils.py プロジェクト: newsdev/nyt-scotus
 def smart_dict(self):
     payload = self.dict()
     for key,value in payload.items():
         if value:
             try:
                 payload[key] = smartypants.smartypants(value.strip())
             except TypeError:
                 pass
             except UnicodeError:
                 pass
             except AttributeError:
                 pass
     return payload
コード例 #47
0
def parse_post(src_path):
    _post = {}
    with codecs.open(src_path, 'r', 'utf-8') as src:
        raw_title = src.readline().strip()
        smarty_title = smartypants.smartypants(raw_title)
        _post['title'] = flask.Markup(smarty_title)
        _post['title_first_char'] = first_alpha_char(raw_title).upper()
        _ = src.readline().strip()
        while True:
            meta = src.readline().strip()
            if not meta:
                break
            if ':' in meta:
                meta_key, sep, meta_val = meta.partition(':')
                _post[meta_key.lower().strip()] = meta_val.strip()
        mkd = markdown.markdown(src.read())
        _post['body'] = smartypants.smartypants(mkd)
    _post['slug'] = get_slug_from_path(src_path)
    _post['published'] = parse_datetime(_post['published'])
    _post['year'] = _post.get('published').year
    _post['url'] = get_post_url(_post)
    return _post
コード例 #48
0
ファイル: typogrify.py プロジェクト: memiks/cyrax
def smartypants(text):
    """Applies smarty pants to curl quotes.

    >>> smartypants('The "Green" man')
    u'The &#8220;Green&#8221; man'
    """
    try:
        import smartypants
    except ImportError:
        logger.error("Error in {% smartypants %} filter: The Python smartypants library isn't installed.")
        return text
    output = smartypants.smartypants(text)
    return jinja2.Markup(output)
コード例 #49
0
def smartypants(text):
    """Applies smarty pants to curl quotes.

    >>> smartypants('The "Green" man')
    'The &#8220;Green&#8221; man'
    """
    try:
        import smartypants
    except ImportError:
        raise TypogrifyError("Error in {% smartypants %} filter: The Python smartypants library isn't installed.")
    else:
        output = smartypants.smartypants(text)
        return output
コード例 #50
0
def smartypants(text):
    """Applies smarty pants to curl quotes.
    
    >>> smartypants('The "Green" man')
    u'The &#8220;Green&#8221; man'
    """
    try:
        import smartypants
    except ImportError:
        return text
    else:
        output = smartypants.smartypants(text)
        return output
コード例 #51
0
ファイル: filters.py プロジェクト: barrysteyn/typogrify
def smartypants(text):
    """Applies smarty pants to curl quotes.

    >>> smartypants('The "Green" man')
    'The &#8220;Green&#8221; man'
    """
    try:
        import smartypants
    except ImportError:
        raise TypogrifyError("Error in {% smartypants %} filter: The Python smartypants library isn't installed.")
    else:
        output = smartypants.smartypants(text)
        return output
コード例 #52
0
ファイル: typogrify.py プロジェクト: piranha/cyrax
def smartypants(text):
    """Applies smarty pants to curl quotes.

    >>> smartypants('The "Green" man')
    u'The &#8220;Green&#8221; man'
    """
    try:
        import smartypants
    except ImportError:
        logger.error("Error in {% smartypants %} filter: The Python smartypants library isn't installed.")
        return text
    output = smartypants.smartypants(text)
    return jinja2.Markup(output)
コード例 #53
0
ファイル: model.py プロジェクト: Timunroe/cfl-ticats
def munge_feed(items):
    print("++++++++++\nIn munge_feed module ...")
    for post in items:
        post['title_api'] = smartypants.smartypants(post['title_api'].strip())
        post['caption_api'] = smartypants.smartypants(
            post['caption_api'].strip())
        regex = re.compile(r"^.*\|\|", re.IGNORECASE)
        post['categories_api'] = list(
            set([regex.sub('', x.lower()) for x in post['categories_api']]))
        if post['sections_api']:
            post['sections_api'] = [x.lower() for x in post['sections_api']]
        post['desc_api'] = smartypants.smartypants(post['desc_api'].strip())
        post['desc_api'] = " ".join(post['desc_api'].split())
        date_object = datetime.datetime.strptime(post['pubdate_api'],
                                                 '%Y-%m-%dT%H:%M:%S')
        post['timestamp'] = date_object.strftime('%b %d %I:%M %p')
        post['timestamp'] = post['timestamp'].replace(' 0', ' ').replace(
            'Jul', 'July').replace('Apr',
                                   'April').replace('Mar', 'March').replace(
                                       'Jun', "June").replace(':00', '')
        post['timestamp_epoch'] = int(
            (date_object - datetime.datetime(1970, 1, 1)).total_seconds())
        if "opinion" in post['sections_api']:
            label_start = "OPINION"
        else:
            label_start = ""
        if post['source_api']:
            label_end = post['source_api']
        else:
            label_end = post['site_api']
        post['label_api'] = (
            (label_start + ' | ') * str_len_check(label_start)) + (
                (post['author_api'] + ' | ') *
                str_len_check(post['author_api'])) + label_end
        post['label_api'] = post['label_api'].replace(
            "The Hamilton Spectator",
            "The Spec").replace("Hamilton Spectator",
                                "The Spec").replace("Toronto Star", "The Star")
    return items
コード例 #54
0
ファイル: model.py プロジェクト: Timunroe/world_cup
def parse_form(form_data, kind="list"):
    db = TinyDB(cfg.config['db_name'])
    Record = Query()
    print("incoming form data:")
    # print(form_data)
    # print("converted to a dict")
    print(dict(form_data))
    # form data will have keys, values that may be lists or a single string.
    form_data_dict = dict(form_data)
    if kind == 'list':
        # form data is coming from the 'lineup' page,
        # which can have multiple changes on multiple assets
        for k, v in form_data_dict.items():
            if k != "action":
                if isinstance(v, list):
                    # it's a list of strings.
                    for item in v:
                        # check if empty string
                        if item:
                            asset_id, field, new_value = item.split('__')
                            print(
                                f"++++++++\nSetting this item: {asset_id} to {field}: {new_value}\n++++++++"
                            )
                            db.update({field: int(new_value)},
                                      Record.asset_id == asset_id)
                else:
                    # check if empty string
                    if v:
                        asset_id, field, new_value = v.split('__')
                        print(
                            f"++++++++\nSetting this item: {asset_id} to {field}: {new_value}\n++++++++"
                        )
                        db.update({field: int(new_value)},
                                  Record.asset_id == asset_id)
    else:
        # form data is coming from the 'item' page instead,
        # mutiple changes possible but only 1 asset affected
        post_update = {}
        asset_id = form_data_dict['asset_id'][0]
        for x in ['draft_user', 'rank', 'rank_time']:
            if form_data_dict[x][0] != '':
                post_update[x] = int(form_data_dict[x][0])
        for x in ['label_user', 'title_user', 'desc_user']:
            if form_data_dict[x][0] != '':
                post_update[x] = smartypants.smartypants(
                    form_data_dict[x][0].strip())
        print("Data to update:")
        print(post_update)
        db.update(post_update, Record.asset_id == asset_id)
    db.close()
    return
コード例 #55
0
ファイル: render_utils.py プロジェクト: eads/lookatthis
def smarty_filter(s):
    """
    Filter to smartypants strings.
    """
    if type(s) == 'Markup':
        s = s.unescape()

    # Evaulate COPY elements
    if type(s) is not unicode:
        s = unicode(s)

    s = smartypants(s)

    return Markup(s)
コード例 #56
0
 def smartypants(text):
     """Applies smarty pants to curl quotes.
 
     >>> smartypants('The "Green" man')
     'The &#8220;Green&#8221; man'
     """
     try:
         import smartypants
     except ImportError:
         raise TypogrifyError("Error in {% smartypants %} filter: ")
     else:
         from smartypants import Attr
         attr = Attr.set1 & (~(Attr.mask_d))
         output = smartypants.smartypants(text, attr)
         return output
コード例 #57
0
def parse_transcript(path):
    """
    Parse a Premiere .tsv file, calculate total seconds and write to JSON for use in the app.
    """
    data = {'subtitles': []}
    filename, ext = path.split('/')[-1].split('.')

    with codecs.open(path, 'rb', encoding='utf16') as f:
        transcript = f.read().encode('utf-8')
        tab_reader = csv.reader(BytesIO(transcript), delimiter='\t')
        headers = tab_reader.next()
        for row in tab_reader:
            # Premiere exports kind of suck
            if row[0] == '':
                words = smartypants(row[1].strip())
                time_str = row[2]
            else:
                words = smartypants(row[0].strip())
                time_str = row[1]

            hours, minutes, seconds, frame = [
                int(x) for x in time_str.split(':')
            ]

            decimal = (float(frame) / 24)
            total_seconds = (hours * 3600) + (minutes * 60) + (seconds +
                                                               decimal)

            segment = {
                'time': total_seconds,
                'transcript': words,
            }
            data['subtitles'].append(segment)

    with open('www/data/%s.json' % filename, 'w') as wf:
        wf.write(json.dumps(data))