def extract_csv(head, separator): """ csv file => data frame => html Args: file_ - file-like object opened in binary mode, pointing to .csv Returns: html - html version of *first sheet only* in workbook info - metadata """ warnings_ = io.StringIO() # this shouldn't balloon memory because head is limited in size by get_preview_lines try: data = pandas.read_csv(io.StringIO('\n'.join(head)), sep=separator) except pandas.errors.ParserError: # temporarily redirect stderr to capture warnings (usually errors) with redirect_stderr(warnings_): data = pandas.read_csv( io.StringIO('\n'.join(head)), error_bad_lines=False, warn_bad_lines=True, # sep=None is slower (doesn't use C), deduces the separator sep=None) html = remove_pandas_footer(data._repr_html_()) # pylint: disable=protected-access return html, {'note': TRUNCATED, 'warnings': warnings_.getvalue()}
def extract_excel(file_): """ excel file => data frame => html Args: file_ - file-like object opened in binary mode, pointing to XLS or XLSX Returns: html - html version of *first sheet only* in workbook info - metadata """ first_sheet = pandas.read_excel(file_, sheet_name=0) html = remove_pandas_footer(first_sheet._repr_html_()) # pylint: disable=protected-access return html, {}