예제 #1
0
 def get_table_header(self, soup_obj):
     header_row = clean_find_all(soup_obj.thead.tr, ["th"])
     header_row_text = [get_full_string(e) for e in header_row]
     header_row_text = [
         e.strip() for e in header_row_text
         if e.strip() != "" and e.strip() != "#"
     ]
     if header_row_text[0] != "Document Title":
         header_row_text = ["Row Number"] + header_row_text
     return (header_row_text)
예제 #2
0
def get_report_tables(soup_obj, *args, **kwargs):
    tables = clean_find_all(soup_obj, ['section', {'class': 'card mb-2'}])
    if len(tables) != 12:
        raise Exception(
            str(len(tables)) + " table headers found instead of 12!")
    dict_table = {}
    for table in tables:
        df = DataTable(table, *args, **kwargs)
        dict_table[df.title] = df
    return (dict_table)
예제 #3
0
def get_rows_in_table(table_obj):
    rows = clean_find_all(table_obj, ['tr', {'class': 'nowrap'}])
    if len(rows) == 0:
        return (None)
    else:
        return (rows)
예제 #4
0
def extract_columns(row_obj, colval="td"):
    cols = clean_find_all(row_obj, [colval])
    assert len(cols) > 0
    extracted_cols = [col.extract() for col in cols]
    return (extracted_cols)
예제 #5
0
def get_middleware_token(text):
    soup_obj = BeautifulSoup(text)
    l = clean_find_all(soup_obj, ['input', {'name': 'csrfmiddlewaretoken'}])
    assert (len(l) == 1)
    return (l[0]["value"])