Esempio n. 1
0
def extract_csv(head, separator):
    """
    csv file => data frame => html
    Args:
        file_ - file-like object opened in binary mode, pointing to .csv
    Returns:
        html - html version of *first sheet only* in workbook
        info - metadata
    """
    warnings_ = io.StringIO()
    # this shouldn't balloon memory because head is limited in size by get_preview_lines
    try:
        data = pandas.read_csv(io.StringIO('\n'.join(head)), sep=separator)

    except pandas.errors.ParserError:
        # temporarily redirect stderr to capture warnings (usually errors)
        with redirect_stderr(warnings_):
            data = pandas.read_csv(
                io.StringIO('\n'.join(head)),
                error_bad_lines=False,
                warn_bad_lines=True,
                # sep=None is slower (doesn't use C), deduces the separator
                sep=None)

    html = remove_pandas_footer(data._repr_html_())  # pylint: disable=protected-access

    return html, {'note': TRUNCATED, 'warnings': warnings_.getvalue()}
Esempio n. 2
0
def extract_excel(file_):
    """
    excel file => data frame => html
    Args:
        file_ - file-like object opened in binary mode, pointing to XLS or XLSX
    Returns:
        html - html version of *first sheet only* in workbook
        info - metadata
    """
    first_sheet = pandas.read_excel(file_, sheet_name=0)
    html = remove_pandas_footer(first_sheet._repr_html_())  # pylint: disable=protected-access
    return html, {}