def handle_body(self, body_text): """Handle body. Argument: body_text: text string of one blog entry. """ if body_text: body_text = convert.convert_hyperlink(body_text) timestamp, categories, title = convert.get_metadata( body_text.split('\n', 1)[0]) entry_body = convert.extract_entry_body(body_text) rested_body = self.hatena2rest(entry_body) return title, timestamp, categories, rested_body
def hatena2rest(self, str_body): """Convert body text of day entry to rest format. Argument: string: convert target string. """ footnotes = '' table = [] tables = [] merge_string = '' def parse_begin_ref(string_line): """Parse begining of reference block Argument: string: convert target string. """ pat_start_ref, match_obj = utils.regex_search( '^>((http|https)://(.+?)|)>$', string_line) if match_obj: self.ref_flag = True if match_obj.group(1): repl_str = match_obj.group(1) else: repl_str = '' string_line = pat_start_ref.sub( repl_str, string_line) return string_line def parse_end_ref(string_line): """Parse ending of reference block Argument: string: convert target string. """ pat_end_ref, match_obj = utils.regex_search( '^<<', string_line) if match_obj: string_line = pat_end_ref.sub('\n\n', string_line) self.ref_flag = False else: string_line = re.sub('^', ' ', string_line) return string_line def parse_begin_codeblock(string_line): """Parse begining of code block Argument: string: convert target string. """ pat_code_open, match_obj = utils.regex_search( '>\|([a-zA-Z0-9]*)\|$|>\|()$', string_line) if match_obj: # code block opening self.code_flag = True if match_obj.group(1): lexer_str = convert.replace_lexer(match_obj.group(1)) string_line = pat_code_open.sub( '\n.. code-block:: ' + lexer_str + '\n', string_line) else: string_line = pat_code_open.sub( '\n.. code-block:: sh\n', string_line) return string_line def parse_end_codeblock(string_line): """Parse ending of codeblock. Argument: string_line: parsing target string. """ pat_code_close, match_obj = utils.regex_search( '^\|\|<|^\|<$', string_line) if match_obj: string_line = pat_code_close.sub('\n', string_line) # code block closing self.code_flag = False else: string_line = re.sub('^', ' ', string_line) return string_line def extract_tables(string_line, table, tables): """Extract tables Argument: string_line: parsing target string. table: parsing target table tables: parsing target tables """ pat_table, match_obj = utils.regex_search( '^\|(.+?)\|$', string_line) if match_obj: row_data = (match_obj.group(0), match_obj.groups()[0].split('|')) if not self.table_flag: # table start self.table_flag = True table.append(row_data) else: if self.table_flag: # table close tables.append(table) table = [] self.table_flag = False return table, tables if str_body: # str_line exclude '\n' for str_line in str_body.split('\n'): # convert hyperlink str_line = convert.convert_hyperlink(str_line) # handle line inside code block if self.code_flag: str_line = parse_end_codeblock(str_line) # handle line outside code block else: str_line = parse_begin_codeblock(str_line) # replace '*' to '\*' of inline str_line = convert.replace_asterisk(str_line) # listing str_line = self.listing2rest(str_line) # convert shell var str_line = convert.replace_shell_variable(str_line) # section , subsection str_line = convert.section2rest(str_line) # convert image from hatena fotolife str_line = self.fotolife2rest(str_line) # convert footnote str_line, footnotes_ = convert.footnote2rest(str_line) if footnotes_: footnotes += footnotes_ + '\n' # convert refs if self.ref_flag: str_line = parse_end_ref(str_line) else: str_line = parse_begin_ref(str_line) # extract table data table, tables = extract_tables(str_line, table, tables) # remove internal_link and convert blog parts str_line = self.convert_blog_parts(str_line) merge_string += utils.remove_element_entity(str_line) + '\n' # convert table merge_string = self.table2rest(tables, merge_string) self.code_flag = False return merge_string + '\n' + footnotes