Ejemplo n.º 1
0
    def __init__(self, path):
        # Read the post
        contents = ''
        with codecs.open(path, 'r', 'utf-8') as input:
            contents = input.read()

        front, body = contents.split('\n---\n')

        # Parse the metadata
        self.info = {}
        for line in front.split('\n'):
            name, value = re.match('(\w+)\s*=\s*(.+)', line).groups()
            self.info[name] = value

        self.body = body

        # Parse the date from the path
        self.year, self.month, self.date = re.search('(20\d\d)-(\d\d)-(\d\d)', path).groups()

        # Auto-generate permalink if not given one
        if not 'permalink' in self.info:
            self.info['permalink'] = utils.linkify(self.info['title'])

        basename = os.path.relpath(path, 'posts')
        self.basename = basename.split('.')[0]
Ejemplo n.º 2
0
def fix_file(path):
    with open(path, 'r') as input:
        content = input.read()
        m = re.search('\# \[([^\]]+)\]\(http://journal\.stuffwithstuff\.com/20\d\d/\d\d/\d\d/(\S+)/', content)
        perma = utils.linkify(m.group(1))
        if perma != m.group(2):
            print path
            print perma
            print m.group(2)
            print '---'
Ejemplo n.º 3
0
def fix_file(path):
    with open(path, 'r') as input:
        content = input.read()
        m = re.search(
            '\# \[([^\]]+)\]\(https://viviwilliam\.github\.io/20\d\d/\d\d/\d\d/(\S+)/',
            content)
        perma = utils.linkify(m.group(1))
        if perma != m.group(2):
            print path
            print perma
            print m.group(2)
            print '---'
Ejemplo n.º 4
0
def fix_file(path):
    with open(path, 'r') as input:
        content = input.read()
        m = re.search(
            '\# \[([^\]]+)\]\(http://journal\.stuffwithstuff\.com/20\d\d/\d\d/\d\d/(\S+)/',
            content)
        perma = utils.linkify(m.group(1))
        if perma != m.group(2):
            print path
            print perma
            print m.group(2)
            print '---'
Ejemplo n.º 5
0
def fix_file(path):
    with open(path, 'r') as input:
        print path

        m = re.search('(.*)\.markdown', os.path.basename(path))
        date = m.group(1)

        content = input.read()
        m = re.search('title: "(.*)"', content)
        perm = utils.linkify(m.group(1))

        # Save the file back out
        with open('new/%s-%s.md' % (date, perm), 'w') as output:
            output.write(content)
Ejemplo n.º 6
0
def fix_file(path):
    with open(path, 'r') as input:
        print path

        m = re.search('(.*)\.markdown', os.path.basename(path))
        date = m.group(1)

        content = input.read()
        m = re.search('title: "(.*)"', content)
        perm = utils.linkify(m.group(1))

        # Save the file back out
        with open('new/%s-%s.md' % (date, perm), 'w') as output:
            output.write(content)
Ejemplo n.º 7
0
def fix_file(path):
    with open(path, 'r') as input:
        content = input.read()
        # Parse the title and link
        m = re.search('\# \[([^\]]+)\]\(https://viviwilliam\.github\.io/20\d\d/\d\d/\d\d/(\S+)/', content)

        # Add the title
        result = 'title = ' + m.group(1) + '\n'

        perma = utils.linkify(m.group(1))
        if perma != m.group(2):
            result = result + 'permalink = ' + m.group(2) + '\n'
            print path
            print perma
            print m.group(2)
            print '---'

        result = result + content

        # Save the file back out
        with open(path, 'w') as output:
            output.write(result)
Ejemplo n.º 8
0
def fix_file(path):
    with open(path, 'r') as input:
        content = input.read()
        # Parse the title and link
        m = re.search('\# \[([^\]]+)\]\(http://journal\.stuffwithstuff\.com/20\d\d/\d\d/\d\d/(\S+)/', content)

        # Add the title
        result = 'title = ' + m.group(1) + '\n'

        perma = utils.linkify(m.group(1))
        if perma != m.group(2):
            result = result + 'permalink = ' + m.group(2) + '\n'
            print path
            print perma
            print m.group(2)
            print '---'

        result = result + content

        # Save the file back out
        with open(path, 'w') as output:
            output.write(result)
Ejemplo n.º 9
0
def to_html_quotebox(quote: str) -> str:
    """Generates HTML that shows a quote.

    Args:
        quote (str): Raw quote string, added through `.addquote`.

    Raises:
        BaseException: quote is empty.

    Returns:
        str: htmlbox.
    """
    if not quote:
        # This shouldn't happen because empty quotes are ignored by `.addquote`.
        raise BaseException("Trying to create quotebox for empty quote.")

    # Valid timestamp formats: [xx:xx], [xx:xx:xx]
    timestamp_regex = r"(\[\d{2}:\d{2}(?::\d{2})?\])"
    splitted = re.split(timestamp_regex, quote)

    # Return the quote unparsed if it has a custom format, aka one of these conditions
    # applies:
    # (1) Quote doesn't start with a timestamp.
    # (2) Quote only has timestamps.
    if splitted[0] or not any(part.lstrip() for part in splitted[::2]):
        return utils.linkify(quote)

    lines: List[str] = []
    for timestamp, phrase in zip(splitted[1::2], splitted[2::2]):
        # Wrap every line in a <div class="chat"></div> and if it is a regular chat
        # message format it accordingly.

        phrase = phrase.lstrip()
        if not phrase:
            # Timestamp with an empty phrase.
            # Append the timestamp to the previous phrase, it was probably part of it.
            if not lines:
                lines.append(timestamp)
            else:
                lines[-1] += timestamp
        elif ": " in phrase and phrase[0] != "(":
            # phrase is a chat message.
            # Example: "[03:56] @Plat0: Hi"

            # userstring: Username, optionally preceded by its rank.
            # body: Content of the message sent by the user.
            userstring, body = phrase.split(": ", 1)

            # rank: Character rank or "" (not " ") in case of a regular user.
            # username: userstring variable stripped of the character rank.
            if userstring[0] not in string.ascii_letters + string.digits:
                rank = userstring[0]
                username = userstring[1:]
            else:
                rank = ""
                username = userstring

            # Escape special characters: needs to be done last.
            # Timestamp doesn't need to be escaped.
            rank = utils.html_escape(rank)
            username = utils.html_escape(username)
            body = utils.linkify(body)

            lines.append(f"<small>{timestamp} {rank}</small>"
                         f"<username>{username}:</username> "
                         f"<em>{body}</em>")
        else:
            # phrase is a PS message that may span over multiple lines.
            # Example: "[14:20:43] (plat0 forcibly ended a tournament.)"

            # Text contained within round parentheses is considered a separated line.
            # This is true for most use-cases but it's still euristic.
            sublines = re.split(r"(\(.*\))", phrase)
            sublines = [utils.linkify(s) for s in sublines if s.strip()]

            # The timestamp is written only on the first subline.
            sublines[0] = f"<small>{timestamp}</small> <em>{sublines[0]}</em>"
            lines += sublines
    # Merge lines
    html = '<div class="message-log" style="display: inline-block">'
    for line in lines:
        html += f'<div class="chat">{line}</div>'
    html += "</div>"
    return html
Ejemplo n.º 10
0
def analyzeUser(user, words=config.N_WORDS, comment=None, withLinks=False):
    logging.info(f"Analyzing user u/{user} for word(s): {', '.join(words)}")

    isNwords = words == config.N_WORDS
    recentComments = []
    try:
        recentComments = list(config.reddit.redditor(user).comments.new())
    except Forbidden as e:
        logging.info(
            f"Unauthorized to fetch recent comments, user was probably suspended from Reddit: {e}"
        )

    submissions = getUserPosts(user)
    comments = list(
        config.api.search_comments(author=user,
                                   filter=['body', 'id', 'permalink'],
                                   q="|".join(words),
                                   size=1000))

    logging.info(
        f"Found {len(comments)} comments for u/{user} from pushshift and {len(recentComments)} recent comments."
    )

    totalMatches = 0
    totalNRMatches = 0
    links = []
    for s in submissions:
        count = countTextForWords(words, s.title) + countTextForWords(
            words, s.selftext) if (hasattr(s, 'selftext')) else 0
        totalMatches += count
        if withLinks and count > 0 and hasattr(s, 'permalink'):
            links.append(s.permalink)
        if isNwords:
            totalNRMatches += countTextForWords(
                words[2:], s.title) + countTextForWords(
                    words[2:], s.selftext) if (hasattr(s, 'selftext')) else 0
    processedComments = []
    commentsWithoutLinks = []
    commentIds = []
    for c in (recentComments + comments):
        if c.id in processedComments:
            continue
        processedComments.append(c.id)

        count = countTextForWords(words, c.body) if (hasattr(c, 'body')) else 0
        totalMatches += count
        if count > 0:
            commentIds.append(c.id)
            if withLinks and hasattr(c, 'permalink'):
                links.append(c.permalink)
        if isNwords:
            totalNRMatches += countTextForWords(words[2:], c.body) if (hasattr(
                c, 'body')) else 0

    logging.info(
        f"Finished analyzing user u/{user}, results: {totalMatches}, {totalNRMatches}"
    )

    links = list(map(lambda x: utils.linkify(x), links))

    return totalMatches, totalNRMatches, links, commentIds
Ejemplo n.º 11
0
def test_linkify(uri: str, expected_html: str) -> None:
    assert utils.linkify(uri) == expected_html