def parse_transcript(path): """ Parse a Premiere .tsv file, calculate total seconds and write to JSON for use in the app. """ data = { 'subtitles': [] } filename, ext = path.split('/')[-1].split('.') with codecs.open(path, 'rb', encoding='utf16') as f: transcript = f.read().encode('utf-8') tab_reader = csv.reader(BytesIO(transcript), delimiter='\t') headers = tab_reader.next() for row in tab_reader: # Premiere exports kind of suck if row[0] == '': words = smartypants(row[1].strip()) time_str = row[2] else: words = smartypants(row[0].strip()) time_str = row[1] hours, minutes, seconds, frame = [int(x) for x in time_str.split(';')] decimal = (float(frame) / 24) total_seconds = (hours * 3600) + (minutes * 60) + (seconds + decimal) segment = { 'time': total_seconds, 'transcript': words, } data['subtitles'].append(segment) with open('www/data/%s.json' % filename, 'w') as wf: wf.write(json.dumps(data))
def publish_index_pages(self): file_posts = self.get_all_file_posts_by_date() posts = [ Post(f, self.s) for f in self.get_all_file_posts_by_date()] for post in posts: post.content = smartypants(self.md_to_html(post.content)) post.title = smartypants(post.title) first_post = 0 last_post = first_post + self.s.POSTS_PER_PAGE page_number = 0 dest_fname = self.s.INDEX_PAGE dest_dir = self.s.WWW_DIR prev_page_url = None next_page_url = None while first_post < len(posts): p = posts[first_post:last_post] if page_number == 0: local_fname = dest_fname else: local_fname = "%s-%d%s" % (os.path.splitext(dest_fname)[0], page_number, self.s.HTML_EXT ) # Pagination if len(posts) <= last_post: # No next page next_page_url = None else: next_page_url = "%s-%d%s" % (os.path.splitext(self.s.INDEX_PAGE)[0], page_number + 1, self.s.HTML_EXT) if first_post - self.s.POSTS_PER_PAGE < 0: prev_page_url = None else: if page_number == 1: prev_page_url = self.s.INDEX_PAGE else: prev_page_url = "%s-%d.html" % (os.path.splitext(self.s.INDEX_PAGE)[0], page_number - 1) self.write_posts_to_file( posts=p, fname=local_fname, dir=dest_dir, template=self.s.INDEX_TEMPLATE, prev_page_url=prev_page_url, next_page_url=next_page_url, ) logging.info("Wrote posts %d-%d to %s." % (first_post, last_post, local_fname)) first_post = last_post last_post = first_post + self.s.POSTS_PER_PAGE page_number = page_number + 1
def prepare_post(self, post): post.title = smartypants(post.title) post.content = smartypants(markdown.markdown(post.content, extensions=self.s.MD_EXTENSIONS, extension_configs=self.s.MD_EXTENSION_CONFIGS, output_format=self.s.MD_OUTPUT_FORMAT ))
def __init__(self, **kwargs): self.date = kwargs['date'] self.source_file = kwargs['source_file'] self.summary = smartypants.smartypants(kwargs['summary']) self.title = smartypants.smartypants(kwargs['title']) self.route = kwargs['route'] self.url = kwargs['url'] # Having the posts enables a blog post to find its relationships. self._posts = kwargs['posts']
def __init__(self, **kwargs): self.date = kwargs["date"] self.source_file = kwargs["source_file"] self.summary = smartypants.smartypants(kwargs["summary"]) self.title = smartypants.smartypants(kwargs["title"]) self.route = kwargs["route"] self.url = kwargs["url"] # Having the posts enables a blog post to find its relationships. self._posts = kwargs["posts"]
def smartquotes(self, text: str) -> str: """If enabled, apply 'smart quotes' to the text; replaces quotes and dashes by nicer looking symbols""" if self.supports_smartquotes and self.do_smartquotes: if hasattr(smartypants.Attr, "u"): return smartypants.smartypants(text, smartypants.Attr.q | smartypants.Attr.B | smartypants.Attr.D | smartypants.Attr.e | smartypants.Attr.u) else: # older smartypants lack attribute 'u' for avoiding html entity creation txt = smartypants.smartypants(text, smartypants.Attr.q | smartypants.Attr.B | smartypants.Attr.D | smartypants.Attr.e) import html.parser return html.parser.unescape(txt) # type: ignore return text
def make_item_content_encoded(self, text1, text2, url, comment_name): """ Called from item_content_encoded() in children. text1 and text2 are chunks of HTML text (or empty strings). url is the URL of the item (no domain needed, eg '/diary/1666/10/31/'). comment_name is one of 'comment' or 'annotation'. """ return '%s %s <p><strong><a href="%s#%ss">Read the %ss</a></strong></p>' % ( force_unicode(smartypants.smartypants(text1)), force_unicode(smartypants.smartypants(text2)), add_domain(Site.objects.get_current().domain, url), comment_name, comment_name )
def render_gist(request, id, raw): """Render a raw gist and store it""" gist = { 'id': raw['id'], 'html_url': raw['html_url'], 'public': raw['public'], 'description': raw['description'], 'created_at': iso8601.parse_date(raw['created_at']), 'updated_at': iso8601.parse_date(raw['updated_at']), 'fetched_at': rdb.now(), 'author_id': raw['user']['id'], 'author_login': raw['user']['login'], 'files': [], } for gistfile in raw['files'].values(): format = RENDERABLE.get(gistfile['language'], None) if format is None: continue output = None if format is FORMAT_MD: payload = { 'mode': 'gfm', 'text': gistfile['content'], } req_render = requests.post('https://api.github.com/markdown', params=GITHUB_AUTH_PARAMS, data=unicode(json.dumps(payload))) if req_render.status_code != 200: logger.warn('Render {} file {} failed: {}'.format( id, gistfile['filename'], req_render.status_code)) continue else: output = smartypants.smartypants(req_render.text) if format is FORMAT_RST: rendered = render_rst(gistfile['content'], writer_name='html')['fragment'] output = smartypants.smartypants(rendered) if output is not None: gistfile['rendered'] = output gist['files'].append(gistfile) rdb.table('gists').insert(gist, upsert=True).run(request.rdbconn) return gist
def render_gist(request, id, raw): """Render a raw gist and store it""" gist = { 'id': raw['id'], 'html_url': raw['html_url'], 'public': raw['public'], 'description': raw['description'], 'created_at': iso8601.parse_date(raw['created_at']), 'updated_at': iso8601.parse_date(raw['updated_at']), 'fetched_at': rdb.now(), 'author_id': raw['user']['id'], 'author_login': raw['user']['login'], 'files': [], } for gistfile in raw['files'].values(): format = RENDERABLE.get(gistfile['language'], None) if format is None: continue output = None if format is FORMAT_MD: payload = { 'mode': 'gfm', 'text': gistfile['content'], } req_render = requests.post('https://api.github.com/markdown', params=GITHUB_AUTH_PARAMS, data=unicode(json.dumps(payload))) if req_render.status_code != 200: logger.warn('Render {} file {} failed: {}'.format(id, gistfile['filename'], req_render.status_code)) continue else: output = smartypants.smartypants(req_render.text) if format is FORMAT_RST: rendered = render_rst(gistfile['content'], writer_name='html')['fragment'] output = smartypants.smartypants(rendered) if output is not None: gistfile['rendered'] = output gist['files'].append(gistfile) rdb.table('gists').insert(gist, upsert=True).run(request.rdbconn) return gist
def convert_entities(obj): for key in obj.iterkeys(): if isinstance(obj[key], unicode): obj[key] = smartypants(obj[key]) obj[key] = obj[key].encode('ascii', 'xmlcharrefreplace') else: convert_entities(obj[key])
def prettify_title(title): """Sanitizes all (ALL) HTML elements in titles while prettifying the quotes and dashes used in the titles of threads and categories. """ return bleach.clean(\ smartypants(title, "2"),\ tags=[], attributes={})
def webfix_unicode(possible_string): """ This is ugly but it will create Times-approved HTML out of terrible cut-and-paste from decision text. """ CHAR_MAP = [ (u'\xa7', u'§'), (u'\u2014', u'—'), (u'\u2013', u'–'), (u'\x97', u'—'), (u'\xa4', u'€'), (u'\u201c', u'"'), (u'\u201d', u'"'), (u'\x96', u'–'), ] if isinstance(possible_string, basestring): string = possible_string string = string.strip() for char, replace_char in CHAR_MAP: string = string.replace(char, replace_char) string = string.decode('utf-8') string = unicode(string) string = ftfy.fix_text(string) string = smartypants.smartypants(string) return string return possible_string
def generate(self, markup=None): """Generate HTML >>> class Handler(BaseHandler): ... def _generate(self, markup=None): return markup >>> handler = Handler(None) >>> print(handler.generate('foo "bar"')) foo "bar" >>> handler.options['smartypants'] = True >>> print(handler.generate('foo "bar"')) foo “bar” """ if markup is None: markup = self.markup html = self._generate(markup) if self.options.get('smartypants', False): if not HAS_SMARTYPANTS: warnings.warn( "smartypants option is set, " "but the library isn't installed.", RuntimeWarning) return html Attr = smartypants.Attr html = smartypants.smartypants(html, Attr.set1 | Attr.w) if self.SUPPORT_EMBED_IMAGES and self.options.get( 'embed_images', False): html = self.embed_images(html) return html
def apply_smartypants(self, text, smarty, node): # Try to be clever about when to use smartypants if node.__class__ in (docutils.nodes.paragraph, docutils.nodes.block_quote, docutils.nodes.title): return smartypants(text, smarty) return text
def webfix_unicode(possible_string): """ This is ugly but it will create Times-approved HTML out of terrible cut-and-paste from decision text. """ character_map = [ ('\xa7', '§'), ('\u2014', '—'), ('\u2013', '–'), ('\x97', '—'), ('\xa4', '€'), ('\u201c', '"'), ('\u201d', '"'), ('\x96', '–'), ] if isinstance(possible_string, str): string = possible_string string = string.strip() for char, replace_char in character_map: string = string.replace(char, replace_char) string = string.decode('utf-8') string = ftfy.fix_text(string) string = smartypants.smartypants(string) return string return possible_string
def content_transform(name, xmltext, child=None, params={}): #print("XSLT: %.80s(%s) %r" % (xmltext.replace('\n', ' '), child or '-', params.get('blogmode', ''))) f = BytesIO(xmltext.encode('utf-8')) try: doc = etree.parse(f) except: print("Text was {!r}".format(xmltext)) raise if child: doc = doc.find(child) params = dict(params) params.update({ 'base': string_param(settings.BASE), }) html = str(xslt_xform(doc, **params)) # smartypants doesn't handle </a>' properly. html = re.sub(r"(</\w+>)'", r"\1’", html) html = smartypants.smartypants(html, smartypants.Attr.q | smartypants.Attr.n) #print("Transformed {!r} into {!r}".format(xmltext[:80], html[:80])) for entry in xslt_xform.error_log: if entry.filename == '<string>': fname = name else: fname = entry.filename print("Message, %s @ %d: %s" % (fname, entry.line, entry.message)) return html
def prettify_text(self, text): """ Make text more nicerer. Run it through SmartyPants and Widont. """ text = self.widont(text) text = smartypants.smartypants(text) return text
def smartypants(text): """Applies smarty pants to curl quotes. >>> smartypants('The "Green" man') u'The “Green” man' """ return _smartypants.smartypants(text)
def publish_pages(self, posts=None, force_publish=False): if force_publish: posts = [Post(os.path.join(self.s.PAGES_DIR, f), self.s) for f in os.listdir(self.s.PAGES_DIR) if f.endswith(self.s.MD_EXT)] for post in posts: post.content = smartypants(self.md_to_html(post.content)) post.title = smartypants(post.title) html_fname = "%s%s" % (post.slug, self.s.HTML_EXT) html_dir = os.path.join(self.s.WWW_DIR, self.s.WWW_PAGES_URL) html_full_path = os.path.join(html_dir, html_fname) tools.mkdirp(html_dir) # TODO: check dir owner/permission self.write_single_post_to_file(post=post, fname=html_full_path, template=self.s.PAGES_TEMPLATE)
def _smartypants(text): attrs = (smartypants.Attr.b | smartypants.Attr.D | smartypants.Attr.e) return (smartypants.smartypants(text, attrs).replace( "–", "–").replace("—", "—").replace("…", "…").replace("“", "“").replace("”", "”"))
def _to_smart(verse): verse = verse.replace(",`",", '") verse = verse.replace("`","'") out = smartypants(verse) parser = HTMLParser() out = parser.unescape(out) return out
def clean_typography(self, text): return smartypants.smartypants(text).\ replace(" ", "").\ replace(" ", " ").\ replace(u'’', u'’').\ replace(u'“', u'“').\ replace(u'”', u'”').\ replace(u'\xa0 ', u' ').replace(u' \xa0', u' ')
def smartypants_wrapper(text): try: import smartypants except ImportError: from typogrify.filters import TypogrifyError raise TypogrifyError( "Error in {% smartypants %} filter: The Python smartypants " "library isn't installed." ) else: attr = smartypants.default_smartypants_attr | smartypants.Attr.w content = smartypants.smartypants(text, attr=attr) if isinstance(text, AMPString): amp_data = text.amp_data content = AMPString(content) content.amp_data = smartypants.smartypants(amp_data, attr=attr) return content
def smartypants_filter(text): """ Smarty pants """ if text: return smartypants(text) else: return ''
def smartypants_wrapper(text): try: import smartypants except ImportError: from typogrify.filters import TypogrifyError raise TypogrifyError( "Error in {% smartypants %} filter: The Python smartypants " "library isn't installed.") else: attr = smartypants.default_smartypants_attr | smartypants.Attr.w content = smartypants.smartypants(text, attr=attr) if isinstance(text, AMPString): amp_data = text.amp_data content = AMPString(content) content.amp_data = smartypants.smartypants(amp_data, attr=attr) return content
def apply_smartypants(self, text, smarty, node): # Try to be clever about when to use smartypants if isinstance(node, (docutils.nodes.paragraph, docutils.nodes.block_quote, docutils.nodes.title)): smarty = _str_attr_to_int(smarty) return smartypants(text, smarty) return text
def _to_smart(verse): verse = verse.replace(",`", ", '") verse = verse.replace("`", "'") out = smartypants(verse) parser = HTMLParser() out = parser.unescape(out) return out
def clean_typography(text): return smartypants.smartypants(text).\ replace(" ", " ").\ replace(" ", " ").\ replace(u'’', u'’').\ replace(u'“', u'“').\ replace(u'”', u'”').\ replace(u'\xa0 ', u' ').replace(u' \xa0', u' ')
def get(self): urls = self.get_query_arguments('url') if urls and len(urls) == 1: url = urls[0] doc = Document(requests.get(url).text) self.write(smartypants(doc.summary())) self.write(STYLE) else: self.write("Please provide ?url=[your-url]")
def about(): context = make_context() f = codecs.open("posts/intro.md", mode="r", encoding="utf-8") contents = f.read() html = markdown.markdown(smartypants(contents)) context['markdown'] = Markup(html) return render_template('post.html', **context)
def smartquotes(text): """Applies smarty pants to curl quotes. >>> smartquotes('The "Green" man') u'The “Green” man' """ text = unicode(text) output = smartypants.smartypants(text) return output
def fileToSentenceList(pathToTextFile): # Import string from file file = io.open(pathToTextFile, mode='r', buffering=-1, encoding=None, errors=None, newline=None, closefd=True) rawString = file.read().strip() # Basic cleaning: Replace line breaks with spaces def removeLineBreaks(string): cleanString = re.sub("[\n\r]+", " ", string) # Linke breaks to spaces cleanString = re.sub("\s{2,}", " ", cleanString) # Remove double spaces return cleanString cleanString = removeLineBreaks(rawString); # Use nltk to tokenize sentences # See http://www.nltk.org/api/nltk.tokenize.html#module-nltk.tokenize sentences = sent_tokenize(cleanString) # Look at all the sentences and throw out things that we don't like buffer = sentences sentences = [] for sentence in buffer: # 1. # Throw out words that don't begin w/ capital letter (happens often after direct speech). # These are correct sentences but I prefer not to have them in the pool because they make little # sense without context. regex = '^[\s({\["\'“‘\-«»‹›]*[A-ZÄ-Ü0-9]' match = re.match(regex, sentence) if match is None: #print "thrown out b/c sentence doesn't start w/ capital letter: ", sentence continue # 2. # Throw out one-word or two-word sentences that contain numbers # They are probably headlines: 'Chapter 2.' or '1.F.1.' if sentence.count(" ") < 2 and re.search("\d", sentence) is not None: #print "thrown out b/c it seems like a nonsensical headline:", sentence continue # Remove white-space at the beginning and end sentence = sentence.strip() # Use typographically correct quotation marks, apostrophes and dashes sentence = HTMLParser().unescape(smartypants.smartypants(sentence)) # Avoid unclosed (or unopened) quotation marks, parentheses, brackets, braces sentence = complete_pairs(sentence) sentences.append({ 'sentence': sentence, 'numberOfWords': sentence.count(' ') + 1, 'file': pathToTextFile, 'randomPoint': [random.random(), 0] # For efficient random entry retrieval. See http://stackoverflow.com/a/9499484/836005 }) return sentences
def download_story(story_id): # TODO: probably use {'drafts': 0, 'include_deleted': 0} storyinfo = session.get(API_STORYINFO + story_id, params={'drafts': 1, 'include_deleted': 1}).json() story_title = storyinfo['title'] story_description = storyinfo['description'] story_createDate = dateutil.parser.parse(storyinfo['createDate']) story_modifyDate = dateutil.parser.parse(storyinfo['modifyDate']) story_author = storyinfo['user']['name'] story_categories = [categories[c] for c in storyinfo['categories'] if c in categories] # category can be 0 story_rating = storyinfo['rating'] # TODO: I think 4 is adult? story_cover = io.BytesIO(session.get(storyinfo['cover']).content) story_url = storyinfo['url'] print('Story "{story_title}": {story_id}'.format(story_title=story_title, story_id=story_id)) # Setup epub book = ez_epub.Book() book.title = story_title book.authors = [story_author] book.sections = [] book.impl.addCover(fileobj=story_cover) book.impl.description = HTML(story_description, encoding='utf-8') # TODO: not sure if this is HTML or text book.impl.url = story_url book.impl.addMeta('publisher', 'Wattpad - scraped') book.impl.addMeta('source', story_url) for part in storyinfo['parts']: chapter_title = part['title'] if part['draft']: print('Skipping "{chapter_title}": {chapter_id}, part is draft'.format(chapter_title=chapter_title, chapter_id=chapter_id)) continue if 'deleted' in part and part['deleted']: print('Skipping "{chapter_title}": {chapter_id}, part is deleted'.format(chapter_title=chapter_title, chapter_id=chapter_id)) continue chapter_id = part['id'] # TODO: could intelligently only redownload modified parts chapter_modifyDate = dateutil.parser.parse(part['modifyDate']) print('Downloading "{chapter_title}": {chapter_id}'.format(chapter_title=chapter_title, chapter_id=chapter_id)) chapter_html = session.get(API_STORYTEXT, params={'id': chapter_id, 'output': 'json'}).json()['text'] chapter_html = smartypants.smartypants(chapter_html) section = ez_epub.Section() section.html = HTML(chapter_html, encoding='utf-8') section.title = chapter_title book.sections.append(section) print('Saving epub') book.make('./{title}'.format(title=book.title.translate(ILLEAGAL_FILENAME_CHARACTERS)))
def apply_smartypants(self, text, smarty, node): # Try to be clever about when to use smartypants if isinstance(node, ( docutils.nodes.paragraph, docutils.nodes.block_quote, docutils.nodes.title )): smarty = _str_attr_to_int(smarty) return smartypants(text, smarty) return text
def smarty_pants(self, text): """Applies smarty pants to html text""" # Try to load smartypants try: import smartypants return smartypants.smartypants(text) except ImportError: pass # this should be logged maybe??? Right now, silently ignored return text
def add(self, posts): """Add the posts and generate a blog list.""" li_html = [] for post in posts: # Put the smartified title back into the post. post.title = title = smartypants.smartypants(post.title) li_html.append( u'<li><a href="{route}">{title}</a></li>'.format( route=post.route, title=title)) self._blog_list = u'\n'.join(li_html) self._posts = posts
def parse_post(src_path): _post = {} with codecs.open(src_path, 'r', 'utf-8') as src: raw_title = src.readline().strip() smarty_title = smartypants.smartypants(raw_title) _post['title'] = flask.Markup(smarty_title) _post['title_first_char'] = first_alpha_char(raw_title).upper() _ = src.readline().strip() while True: meta = src.readline().strip() if not meta: break if ':' in meta: meta_key, sep, meta_val = meta.partition(':') _post[meta_key.lower().strip()] = meta_val.strip() mkd = markdown.markdown(src.read()) _post['body'] = smartypants.smartypants(mkd) _post['slug'] = get_slug_from_path(src_path) _post['published'] = parse_datetime(_post['published']) _post['year'] = _post.get('published').year _post['url'] = get_post_url(_post) return _post
def smart_dict(self): payload = self.dict() for key,value in payload.items(): if value: try: payload[key] = smartypants.smartypants(value.strip()) except TypeError: pass except UnicodeError: pass except AttributeError: pass return payload
def smartypants(text): """Applies smarty pants to curl quotes. >>> smartypants('The "Green" man') u'The “Green” man' """ try: import smartypants except ImportError: logger.error("Error in {% smartypants %} filter: The Python smartypants library isn't installed.") return text output = smartypants.smartypants(text) return jinja2.Markup(output)
def smartypants(text): """Applies smarty pants to curl quotes. >>> smartypants('The "Green" man') 'The “Green” man' """ try: import smartypants except ImportError: raise TypogrifyError("Error in {% smartypants %} filter: The Python smartypants library isn't installed.") else: output = smartypants.smartypants(text) return output
def smartypants(text): """Applies smarty pants to curl quotes. >>> smartypants('The "Green" man') u'The “Green” man' """ try: import smartypants except ImportError: return text else: output = smartypants.smartypants(text) return output
def munge_feed(items): print("++++++++++\nIn munge_feed module ...") for post in items: post['title_api'] = smartypants.smartypants(post['title_api'].strip()) post['caption_api'] = smartypants.smartypants( post['caption_api'].strip()) regex = re.compile(r"^.*\|\|", re.IGNORECASE) post['categories_api'] = list( set([regex.sub('', x.lower()) for x in post['categories_api']])) if post['sections_api']: post['sections_api'] = [x.lower() for x in post['sections_api']] post['desc_api'] = smartypants.smartypants(post['desc_api'].strip()) post['desc_api'] = " ".join(post['desc_api'].split()) date_object = datetime.datetime.strptime(post['pubdate_api'], '%Y-%m-%dT%H:%M:%S') post['timestamp'] = date_object.strftime('%b %d %I:%M %p') post['timestamp'] = post['timestamp'].replace(' 0', ' ').replace( 'Jul', 'July').replace('Apr', 'April').replace('Mar', 'March').replace( 'Jun', "June").replace(':00', '') post['timestamp_epoch'] = int( (date_object - datetime.datetime(1970, 1, 1)).total_seconds()) if "opinion" in post['sections_api']: label_start = "OPINION" else: label_start = "" if post['source_api']: label_end = post['source_api'] else: label_end = post['site_api'] post['label_api'] = ( (label_start + ' | ') * str_len_check(label_start)) + ( (post['author_api'] + ' | ') * str_len_check(post['author_api'])) + label_end post['label_api'] = post['label_api'].replace( "The Hamilton Spectator", "The Spec").replace("Hamilton Spectator", "The Spec").replace("Toronto Star", "The Star") return items
def parse_form(form_data, kind="list"): db = TinyDB(cfg.config['db_name']) Record = Query() print("incoming form data:") # print(form_data) # print("converted to a dict") print(dict(form_data)) # form data will have keys, values that may be lists or a single string. form_data_dict = dict(form_data) if kind == 'list': # form data is coming from the 'lineup' page, # which can have multiple changes on multiple assets for k, v in form_data_dict.items(): if k != "action": if isinstance(v, list): # it's a list of strings. for item in v: # check if empty string if item: asset_id, field, new_value = item.split('__') print( f"++++++++\nSetting this item: {asset_id} to {field}: {new_value}\n++++++++" ) db.update({field: int(new_value)}, Record.asset_id == asset_id) else: # check if empty string if v: asset_id, field, new_value = v.split('__') print( f"++++++++\nSetting this item: {asset_id} to {field}: {new_value}\n++++++++" ) db.update({field: int(new_value)}, Record.asset_id == asset_id) else: # form data is coming from the 'item' page instead, # mutiple changes possible but only 1 asset affected post_update = {} asset_id = form_data_dict['asset_id'][0] for x in ['draft_user', 'rank', 'rank_time']: if form_data_dict[x][0] != '': post_update[x] = int(form_data_dict[x][0]) for x in ['label_user', 'title_user', 'desc_user']: if form_data_dict[x][0] != '': post_update[x] = smartypants.smartypants( form_data_dict[x][0].strip()) print("Data to update:") print(post_update) db.update(post_update, Record.asset_id == asset_id) db.close() return
def smarty_filter(s): """ Filter to smartypants strings. """ if type(s) == 'Markup': s = s.unescape() # Evaulate COPY elements if type(s) is not unicode: s = unicode(s) s = smartypants(s) return Markup(s)
def smartypants(text): """Applies smarty pants to curl quotes. >>> smartypants('The "Green" man') 'The “Green” man' """ try: import smartypants except ImportError: raise TypogrifyError("Error in {% smartypants %} filter: ") else: from smartypants import Attr attr = Attr.set1 & (~(Attr.mask_d)) output = smartypants.smartypants(text, attr) return output
def parse_transcript(path): """ Parse a Premiere .tsv file, calculate total seconds and write to JSON for use in the app. """ data = {'subtitles': []} filename, ext = path.split('/')[-1].split('.') with codecs.open(path, 'rb', encoding='utf16') as f: transcript = f.read().encode('utf-8') tab_reader = csv.reader(BytesIO(transcript), delimiter='\t') headers = tab_reader.next() for row in tab_reader: # Premiere exports kind of suck if row[0] == '': words = smartypants(row[1].strip()) time_str = row[2] else: words = smartypants(row[0].strip()) time_str = row[1] hours, minutes, seconds, frame = [ int(x) for x in time_str.split(':') ] decimal = (float(frame) / 24) total_seconds = (hours * 3600) + (minutes * 60) + (seconds + decimal) segment = { 'time': total_seconds, 'transcript': words, } data['subtitles'].append(segment) with open('www/data/%s.json' % filename, 'w') as wf: wf.write(json.dumps(data))