Ejemplo n.º 1
0
                # Extract the title from the next line
                title = next(infile)[7:]
                # Skip three metadata lines and the blank line separator
                next(infile); next(infile); next(infile); next(infile)
            elif (line[0] == "#"): # Handle linkposts
                title = line.split("](")[0][2:]
            else: # Handle original articles
                title = line
                next(infile)

            # Write opening HTML tags and title to output file
            outfile.write(f"{template[0]}\n<article>\n<h2>{title}</h2>\n")
            continue
        
        # Parse Markdown line into HTML
        html_line = md.html(line)

        # Strip out all HTML tags from line, to leave only content.
        text_line = unescape(sub(r"(\<[^\>]+\>)", "", html_line))
        
        # Increase the paragraph count
        if (len(line) != 0): 
            if (line[0] != "#"):
                paragraph_count += 1

        # Tokenize paragraph by splitting into individual words.
        tokens = resplit(r"\W",text_line)

        # Extract unique words in sentence.
        tokens_set = frozenset(tokens)