def _convert_row(doc_date, xml):
    """
    Given a default date (for the whole document) and a (date, entry)
    row, return

    * a (partial) iso string for the date
    * the text for the entry
    """
    ths = list(xml.iter('th'))
    tds = list(xml.iter('td'))
    if len(ths) < 1:
        date = None
    elif len(ths) > 1:
        ET.dump(xml)
        raise Exception("Did not expect more than one th node")
    else:
        th_text = _clean_date(ths[0].text or "")
        date = read_date(th_text, prefix=doc_date, fuzzy=True)  # or doc_date

    columns = ths + tds
    text = "\n".join(_column_to_text(x) for x in columns)
    if text:
        return date or doc_date, text
    else:
        return None
def _convert_row(doc_date, xml):
    """
    Given a default date (for the whole document) and a (date, entry)
    row, return

    * a (partial) iso string for the date
    * the text for the entry
    """
    ths = list(xml.iter('th'))
    tds = list(xml.iter('td'))
    if len(ths) < 1:
        date = None
    elif len(ths) > 1:
        ET.dump(xml)
        raise Exception("Did not expect more than one th node")
    else:
        th_text = _clean_date(ths[0].text or "")
        date = read_date(th_text, prefix=doc_date, fuzzy=True)  # or doc_date

    columns = ths + tds
    text = "\n".join(_column_to_text(x) for x in columns)
    if text:
        return date or doc_date, text
    else:
        return None
def _convert_section(xml):
    """
    string representation of entire document
    (WARNING: mutates the tree)
    """

    some = lambda l: [x for x in l if x is not None]
    dates = some(read_date(_clean_date(x.text)) for x in xml.iter('head'))
    section_date = dates[0] if dates else None
    for br_node in xml.iter('br'):
        br_node.text = "\n"
    return [_convert_row(section_date, r) for r in xml.iter('tr')]
def _convert_section(xml):
    """
    string representation of entire document
    (WARNING: mutates the tree)
    """

    some = lambda l: [x for x in l if x is not None]
    dates = some(read_date(_clean_date(x.text))
                 for x in xml.iter('head'))
    section_date = dates[0] if dates else None
    for br_node in xml.iter('br'):
        br_node.text = "\n"
    return [_convert_row(section_date, r) for r in xml.iter('tr')]
Example #5
0
def _write_membrane(ntext, oprefix):
    """
    Write out text for an individual membrane
    (may involve multiple files)
    """
    lines = ntext.split("\n")
    mname = _membrane_name(lines[0])
    subentries = lines[1:]
    digits = _digits(subentries)
    for i, line in enumerate(subentries):
        date_str = " ".join(line.split()[:3])
        try:
            date = read_date(date_str, fuzzy=True)
        except ValueError as _:
            date = None
        filename = "-".join([oprefix, mname, str(i + 1).zfill(digits)])
        with codecs.open(filename, 'w', 'utf-8') as stream:
            if date is not None:
                print(date, file=stream)
            print(line, file=stream)
def _write_membrane(ntext, oprefix):
    """
    Write out text for an individual membrane
    (may involve multiple files)
    """
    lines = ntext.split("\n")
    mname = _membrane_name(lines[0])
    subentries = lines[1:]
    digits = _digits(subentries)
    for i, line in enumerate(subentries):
        date_str = " ".join(line.split()[:3])
        try:
            date = read_date(date_str, fuzzy=True)
        except ValueError as _:
            date = None
        filename = "-".join([oprefix,
                             mname,
                             str(i+1).zfill(digits)])
        with codecs.open(filename, 'w', 'utf-8') as stream:
            if date is not None:
                print(date, file=stream)
            print(line, file=stream)
Example #7
0
 def assertDateEqual(self, expected, dstr, **kwargs):
     "assert that date string parses as expected"
     self.assertEqual(expected, read_date(dstr, **kwargs))
 def assertDateEqual(self, expected, dstr, **kwargs):
     "assert that date string parses as expected"
     self.assertEqual(expected, read_date(dstr, **kwargs))