def __init__(self, path): # Read the post contents = '' with codecs.open(path, 'r', 'utf-8') as input: contents = input.read() front, body = contents.split('\n---\n') # Parse the metadata self.info = {} for line in front.split('\n'): name, value = re.match('(\w+)\s*=\s*(.+)', line).groups() self.info[name] = value self.body = body # Parse the date from the path self.year, self.month, self.date = re.search('(20\d\d)-(\d\d)-(\d\d)', path).groups() # Auto-generate permalink if not given one if not 'permalink' in self.info: self.info['permalink'] = utils.linkify(self.info['title']) basename = os.path.relpath(path, 'posts') self.basename = basename.split('.')[0]
def fix_file(path): with open(path, 'r') as input: content = input.read() m = re.search('\# \[([^\]]+)\]\(http://journal\.stuffwithstuff\.com/20\d\d/\d\d/\d\d/(\S+)/', content) perma = utils.linkify(m.group(1)) if perma != m.group(2): print path print perma print m.group(2) print '---'
def fix_file(path): with open(path, 'r') as input: content = input.read() m = re.search( '\# \[([^\]]+)\]\(https://viviwilliam\.github\.io/20\d\d/\d\d/\d\d/(\S+)/', content) perma = utils.linkify(m.group(1)) if perma != m.group(2): print path print perma print m.group(2) print '---'
def fix_file(path): with open(path, 'r') as input: content = input.read() m = re.search( '\# \[([^\]]+)\]\(http://journal\.stuffwithstuff\.com/20\d\d/\d\d/\d\d/(\S+)/', content) perma = utils.linkify(m.group(1)) if perma != m.group(2): print path print perma print m.group(2) print '---'
def fix_file(path): with open(path, 'r') as input: print path m = re.search('(.*)\.markdown', os.path.basename(path)) date = m.group(1) content = input.read() m = re.search('title: "(.*)"', content) perm = utils.linkify(m.group(1)) # Save the file back out with open('new/%s-%s.md' % (date, perm), 'w') as output: output.write(content)
def fix_file(path): with open(path, 'r') as input: content = input.read() # Parse the title and link m = re.search('\# \[([^\]]+)\]\(https://viviwilliam\.github\.io/20\d\d/\d\d/\d\d/(\S+)/', content) # Add the title result = 'title = ' + m.group(1) + '\n' perma = utils.linkify(m.group(1)) if perma != m.group(2): result = result + 'permalink = ' + m.group(2) + '\n' print path print perma print m.group(2) print '---' result = result + content # Save the file back out with open(path, 'w') as output: output.write(result)
def fix_file(path): with open(path, 'r') as input: content = input.read() # Parse the title and link m = re.search('\# \[([^\]]+)\]\(http://journal\.stuffwithstuff\.com/20\d\d/\d\d/\d\d/(\S+)/', content) # Add the title result = 'title = ' + m.group(1) + '\n' perma = utils.linkify(m.group(1)) if perma != m.group(2): result = result + 'permalink = ' + m.group(2) + '\n' print path print perma print m.group(2) print '---' result = result + content # Save the file back out with open(path, 'w') as output: output.write(result)
def to_html_quotebox(quote: str) -> str: """Generates HTML that shows a quote. Args: quote (str): Raw quote string, added through `.addquote`. Raises: BaseException: quote is empty. Returns: str: htmlbox. """ if not quote: # This shouldn't happen because empty quotes are ignored by `.addquote`. raise BaseException("Trying to create quotebox for empty quote.") # Valid timestamp formats: [xx:xx], [xx:xx:xx] timestamp_regex = r"(\[\d{2}:\d{2}(?::\d{2})?\])" splitted = re.split(timestamp_regex, quote) # Return the quote unparsed if it has a custom format, aka one of these conditions # applies: # (1) Quote doesn't start with a timestamp. # (2) Quote only has timestamps. if splitted[0] or not any(part.lstrip() for part in splitted[::2]): return utils.linkify(quote) lines: List[str] = [] for timestamp, phrase in zip(splitted[1::2], splitted[2::2]): # Wrap every line in a <div class="chat"></div> and if it is a regular chat # message format it accordingly. phrase = phrase.lstrip() if not phrase: # Timestamp with an empty phrase. # Append the timestamp to the previous phrase, it was probably part of it. if not lines: lines.append(timestamp) else: lines[-1] += timestamp elif ": " in phrase and phrase[0] != "(": # phrase is a chat message. # Example: "[03:56] @Plat0: Hi" # userstring: Username, optionally preceded by its rank. # body: Content of the message sent by the user. userstring, body = phrase.split(": ", 1) # rank: Character rank or "" (not " ") in case of a regular user. # username: userstring variable stripped of the character rank. if userstring[0] not in string.ascii_letters + string.digits: rank = userstring[0] username = userstring[1:] else: rank = "" username = userstring # Escape special characters: needs to be done last. # Timestamp doesn't need to be escaped. rank = utils.html_escape(rank) username = utils.html_escape(username) body = utils.linkify(body) lines.append(f"<small>{timestamp} {rank}</small>" f"<username>{username}:</username> " f"<em>{body}</em>") else: # phrase is a PS message that may span over multiple lines. # Example: "[14:20:43] (plat0 forcibly ended a tournament.)" # Text contained within round parentheses is considered a separated line. # This is true for most use-cases but it's still euristic. sublines = re.split(r"(\(.*\))", phrase) sublines = [utils.linkify(s) for s in sublines if s.strip()] # The timestamp is written only on the first subline. sublines[0] = f"<small>{timestamp}</small> <em>{sublines[0]}</em>" lines += sublines # Merge lines html = '<div class="message-log" style="display: inline-block">' for line in lines: html += f'<div class="chat">{line}</div>' html += "</div>" return html
def analyzeUser(user, words=config.N_WORDS, comment=None, withLinks=False): logging.info(f"Analyzing user u/{user} for word(s): {', '.join(words)}") isNwords = words == config.N_WORDS recentComments = [] try: recentComments = list(config.reddit.redditor(user).comments.new()) except Forbidden as e: logging.info( f"Unauthorized to fetch recent comments, user was probably suspended from Reddit: {e}" ) submissions = getUserPosts(user) comments = list( config.api.search_comments(author=user, filter=['body', 'id', 'permalink'], q="|".join(words), size=1000)) logging.info( f"Found {len(comments)} comments for u/{user} from pushshift and {len(recentComments)} recent comments." ) totalMatches = 0 totalNRMatches = 0 links = [] for s in submissions: count = countTextForWords(words, s.title) + countTextForWords( words, s.selftext) if (hasattr(s, 'selftext')) else 0 totalMatches += count if withLinks and count > 0 and hasattr(s, 'permalink'): links.append(s.permalink) if isNwords: totalNRMatches += countTextForWords( words[2:], s.title) + countTextForWords( words[2:], s.selftext) if (hasattr(s, 'selftext')) else 0 processedComments = [] commentsWithoutLinks = [] commentIds = [] for c in (recentComments + comments): if c.id in processedComments: continue processedComments.append(c.id) count = countTextForWords(words, c.body) if (hasattr(c, 'body')) else 0 totalMatches += count if count > 0: commentIds.append(c.id) if withLinks and hasattr(c, 'permalink'): links.append(c.permalink) if isNwords: totalNRMatches += countTextForWords(words[2:], c.body) if (hasattr( c, 'body')) else 0 logging.info( f"Finished analyzing user u/{user}, results: {totalMatches}, {totalNRMatches}" ) links = list(map(lambda x: utils.linkify(x), links)) return totalMatches, totalNRMatches, links, commentIds
def test_linkify(uri: str, expected_html: str) -> None: assert utils.linkify(uri) == expected_html