def __init__(self, db_filename = "moore_grants_database.xlsx", report_prefix = "report", sheet_name = "grants", index_column = "url", report_only_new = True): ''' Constructor ''' if(not os.path.isfile(db_filename)): #generate a blank writable excel sheet from scratch field_names = [field_name for field_name in Grant.fields] writer = ExcelWriter(db_filename) profile_dataframe = pd.DataFrame(columns = field_names) profile_dataframe.to_excel(writer,sheet_name) writer.save() writer.close() self.report_filename = (report_prefix + "_" + str(datetime.today())[:19] .replace(":","_").replace(" ","[") + "].xlsx") # kept for posterity, in case only the date component is needed and # we don't care about overwrites # self.report_filename = report_prefix + "_" + str(date.today()) self.db_filename = db_filename self.sheet_name = sheet_name self.dataframe = pd.read_excel(db_filename,sheet_name, index_col = index_column) self.usaved_sol_counter = 0 self.added_counter = 0 self.added_items = set() self.index_column = index_column
def __init__(self, db_filename = "fbo_solicitations.xlsx", report_prefix = "report", sol_sheet_name = "solicitations", filtered_sheet_name = "filtered_solicitations", index_column = "sponsor_number", report_only_new = True): ''' Constructor ''' if(not os.path.isfile(db_filename)): #generate a blank writable excel sheet from scratch field_names = [field_name for field_name in Opportunity.fields] field_names.remove("filtered") writer = ExcelWriter(db_filename) sol_df = pd.DataFrame(columns = field_names) filtered_df = pd.DataFrame(columns = field_names) sol_df.to_excel(writer,sol_sheet_name) filtered_df.to_excel(writer,filtered_sheet_name) writer.save() writer.close() self.report_filename = (report_prefix + "_" + str(datetime.today())[:19] .replace(":","_").replace(" ","[") + "].xlsx") #kept for posterity, in case only the date component is needed and we don't care about overwrites #self.report_filename = report_prefix + "_" + str(date.today()) self.db_filename = db_filename self.sol_sheet_name = sol_sheet_name self.filtered_sheet_name = filtered_sheet_name self.sol_df = pd.read_excel(db_filename,sol_sheet_name, index_col = index_column) self.filtered_df = pd.read_excel(db_filename,filtered_sheet_name, index_col = index_column) self.usaved_sol_counter = 0 self.sol_counter = 0 self.added_items = set()
def save_all(self): ''' Dumps all solicitations in both databases to an excel file ''' print "\n\n======== Saving {:s} ========".format(self.sheet_name) writer = ExcelWriter(self.db_filename) self.dataframe.to_excel(writer,self.sheet_name,merge_cells=False) writer.save() writer.close() print "======== Done saving. ========\n"
def test_excel_date_datetime_format(self, engine, ext, path): # see gh-4133 # # Excel output format strings df = DataFrame( [ [date(2014, 1, 31), date(1999, 9, 24)], [ datetime(1998, 5, 26, 23, 33, 4), datetime(2014, 2, 28, 13, 5, 13) ], ], index=["DATE", "DATETIME"], columns=["X", "Y"], ) df_expected = DataFrame( [ [datetime(2014, 1, 31), datetime(1999, 9, 24)], [ datetime(1998, 5, 26, 23, 33, 4), datetime(2014, 2, 28, 13, 5, 13) ], ], index=["DATE", "DATETIME"], columns=["X", "Y"], ) with tm.ensure_clean(ext) as filename2: writer1 = ExcelWriter(path) writer2 = ExcelWriter( filename2, date_format="DD.MM.YYYY", datetime_format="DD.MM.YYYY HH-MM-SS", ) df.to_excel(writer1, "test1") df.to_excel(writer2, "test1") writer1.close() writer2.close() reader1 = ExcelFile(path) reader2 = ExcelFile(filename2) rs1 = pd.read_excel(reader1, "test1", index_col=0) rs2 = pd.read_excel(reader2, "test1", index_col=0) tm.assert_frame_equal(rs1, rs2) # Since the reader returns a datetime object for dates, # we need to use df_expected to check the result. tm.assert_frame_equal(rs2, df_expected)
def save_all(self): ''' Dumps all solicitations in both databases to an excel file, into two separate spreadsheets: one for filtered items, the other for the remaining (relevant) items ''' print "\n\n======== Saving solicitations... ========" writer = ExcelWriter(self.db_filename) self.sol_df.to_excel(writer,self.sol_sheet_name,merge_cells=False) self.filtered_df.to_excel(writer,self.filtered_sheet_name,merge_cells=False) writer.save() writer.close() print "======== Done saving. ========\n"
def save_all(self): ''' Dumps all solicitations in both databases to an excel file, into two separate spreadsheets: one for filtered items, the other for the remaining (relevant) items ''' print "\n\n======== Saving solicitations... ========" writer = ExcelWriter(self.db_filename) self.sol_df.to_excel(writer, self.sol_sheet_name, merge_cells=False) self.filtered_df.to_excel(writer, self.filtered_sheet_name, merge_cells=False) writer.save() writer.close() print "======== Done saving. ========\n"
def generate_report(self): ''' Generates a separate excel report, consisting of non-award-type notices that are not yet overdue ''' print "\n\n======== Generating report... ========" df = self.dataframe.copy() ix = pd.Series([(True if ix in self.added_items else False ) for ix in df.index ], index=df.index) report_df = df[ix == True] writer = ExcelWriter(self.report_filename) report_df.to_excel(writer,self.sheet_name,merge_cells=False) writer.save() writer.close() print "======== Report Generated as " + self.report_filename + " ========\n"
def test_excel_date_datetime_format(self): _skip_if_no_xlrd() df = DataFrame( [[date(2014, 1, 31), date(1999, 9, 24)], [ datetime(1998, 5, 26, 23, 33, 4), datetime(2014, 2, 28, 13, 5, 13) ]], index=['DATE', 'DATETIME'], columns=['X', 'Y']) df_expected = DataFrame( [[datetime(2014, 1, 31), datetime(1999, 9, 24)], [ datetime(1998, 5, 26, 23, 33, 4), datetime(2014, 2, 28, 13, 5, 13) ]], index=['DATE', 'DATETIME'], columns=['X', 'Y']) with ensure_clean(self.ext) as filename1: with ensure_clean(self.ext) as filename2: writer1 = ExcelWriter(filename1) writer2 = ExcelWriter(filename2, date_format='DD.MM.YYYY', datetime_format='DD.MM.YYYY HH-MM-SS') df.to_excel(writer1, 'test1') df.to_excel(writer2, 'test1') writer1.close() writer2.close() reader1 = ExcelFile(filename1) reader2 = ExcelFile(filename2) rs1 = reader1.parse('test1', index_col=None) rs2 = reader2.parse('test1', index_col=None) tm.assert_frame_equal(rs1, rs2) # since the reader returns a datetime object for dates, we need # to use df_expected to check the result tm.assert_frame_equal(rs2, df_expected)
def generate_report(self): ''' Generates a separate excel report, consisting of non-award-type notices that are not yet overdue ''' print "\n\n======== Generating report... ========" today = datetime.today() df = self.sol_df.copy() df["new"] = pd.Series([(True if ix in self.added_items else False ) for ix in df.index ], index=df.index) report_df = df[(df["proposal_due_date"] >= today) | (df["proposal_due_date"] == None)] writer = ExcelWriter(self.report_filename) report_df.to_excel(writer,self.sol_sheet_name,merge_cells=False) writer.save() writer.close() print "======== Report Generated as " + self.report_filename + " ========\n"
def generate_report(self): ''' Generates a separate excel report, consisting of non-award-type notices that are not yet overdue ''' print "\n\n======== Generating report... ========" today = datetime.today() df = self.sol_df.copy() df["new"] = pd.Series([(1 if ix in self.added_items else 0 ) for ix in df.index ], index=df.index) df["dd"] = [datetime.strptime(dt, "%m/%d/%Y") for dt in df["deadline_date"].values] report_df = df[(df["dd"] >= today) & (df["announcement_type"] != "Award")] writer = ExcelWriter(self.report_filename) report_df.to_excel(writer,self.sol_sheet_name,merge_cells=False) writer.save() writer.close() print "======== Report Generated as " + self.report_filename + " ========\n"
def __init__(self, db_filename="fbo_solicitations.xlsx", report_prefix="report", sol_sheet_name="solicitations", filtered_sheet_name="filtered_solicitations", index_column="sponsor_number", report_only_new=True): ''' Constructor ''' if (not os.path.isfile(db_filename)): #generate a blank writable excel sheet from scratch field_names = [field_name for field_name in Opportunity.fields] field_names.remove("filtered") writer = ExcelWriter(db_filename) sol_df = pd.DataFrame(columns=field_names) filtered_df = pd.DataFrame(columns=field_names) sol_df.to_excel(writer, sol_sheet_name) filtered_df.to_excel(writer, filtered_sheet_name) writer.save() writer.close() self.report_filename = ( report_prefix + "_" + str(datetime.today())[:19].replace(":", "_").replace(" ", "[") + "].xlsx") #kept for posterity, in case only the date component is needed and we don't care about overwrites #self.report_filename = report_prefix + "_" + str(date.today()) self.db_filename = db_filename self.sol_sheet_name = sol_sheet_name self.filtered_sheet_name = filtered_sheet_name self.sol_df = pd.read_excel(db_filename, sol_sheet_name, index_col=index_column) self.filtered_df = pd.read_excel(db_filename, filtered_sheet_name, index_col=index_column) self.usaved_sol_counter = 0 self.sol_counter = 0 self.added_items = set()
def test_excel_date_datetime_format(self, engine, ext): # see gh-4133 # # Excel output format strings df = DataFrame([[date(2014, 1, 31), date(1999, 9, 24)], [datetime(1998, 5, 26, 23, 33, 4), datetime(2014, 2, 28, 13, 5, 13)]], index=["DATE", "DATETIME"], columns=["X", "Y"]) df_expected = DataFrame([[datetime(2014, 1, 31), datetime(1999, 9, 24)], [datetime(1998, 5, 26, 23, 33, 4), datetime(2014, 2, 28, 13, 5, 13)]], index=["DATE", "DATETIME"], columns=["X", "Y"]) with ensure_clean(ext) as filename2: writer1 = ExcelWriter(self.path) writer2 = ExcelWriter(filename2, date_format="DD.MM.YYYY", datetime_format="DD.MM.YYYY HH-MM-SS") df.to_excel(writer1, "test1") df.to_excel(writer2, "test1") writer1.close() writer2.close() reader1 = ExcelFile(self.path) reader2 = ExcelFile(filename2) rs1 = pd.read_excel(reader1, "test1", index_col=0) rs2 = pd.read_excel(reader2, "test1", index_col=0) tm.assert_frame_equal(rs1, rs2) # Since the reader returns a datetime object for dates, # we need to use df_expected to check the result. tm.assert_frame_equal(rs2, df_expected)
def test_excel_date_datetime_format(self): _skip_if_no_xlrd() df = DataFrame([[date(2014, 1, 31), date(1999, 9, 24)], [datetime(1998, 5, 26, 23, 33, 4), datetime(2014, 2, 28, 13, 5, 13)]], index=['DATE', 'DATETIME'], columns=['X', 'Y']) df_expected = DataFrame([[datetime(2014, 1, 31), datetime(1999, 9, 24)], [datetime(1998, 5, 26, 23, 33, 4), datetime(2014, 2, 28, 13, 5, 13)]], index=['DATE', 'DATETIME'], columns=['X', 'Y']) with ensure_clean(self.ext) as filename1: with ensure_clean(self.ext) as filename2: writer1 = ExcelWriter(filename1) writer2 = ExcelWriter(filename2, date_format='DD.MM.YYYY', datetime_format='DD.MM.YYYY HH-MM-SS') df.to_excel(writer1, 'test1') df.to_excel(writer2, 'test1') writer1.close() writer2.close() reader1 = ExcelFile(filename1) reader2 = ExcelFile(filename2) rs1 = reader1.parse('test1', index_col=None) rs2 = reader2.parse('test1', index_col=None) tm.assert_frame_equal(rs1, rs2) # since the reader returns a datetime object for dates, we need # to use df_expected to check the result tm.assert_frame_equal(rs2, df_expected)
def __init__(self, db_filename = "nsf_solicitations.xlsx", report_prefix = "report", sol_sheet_name = "solicitations", filtered_sheet_name = "filtered_solicitations", index_column = "pims_id"): ''' Constructor ''' if(not os.path.isfile(db_filename)): #generate a blank writable excel sheet from scratch field_names = [field_name for field_name in NsfSolicitation.fields] field_names.remove("filtered") writer = ExcelWriter(db_filename) sol_df = pd.DataFrame(columns = field_names) filtered_df = pd.DataFrame(columns = field_names) sol_df.to_excel(writer,sol_sheet_name) filtered_df.to_excel(writer,filtered_sheet_name) writer.save() writer.close() self.report_filename = (report_prefix + "_" + str(datetime.today())[:19] .replace(":","_").replace(" ","[") + "].xlsx") self.db_filename = db_filename self.sol_sheet_name = sol_sheet_name self.filtered_sheet_name = filtered_sheet_name self.sol_df = pd.read_excel(db_filename,sol_sheet_name, index_col = index_column) self.filtered_df = pd.read_excel(db_filename,filtered_sheet_name, index_col = index_column) self.usaved_sol_counter = 0 self.sol_counter = 0 self.added_items = set() self.solicitation_numbers = set() for sn in self.sol_df["solicitation_number"].values: self.solicitation_numbers.add(sn) self.index_column = index_column
def generate_report(self): ''' Generates a separate excel report, consisting of non-award-type notices that are not yet overdue ''' print "\n\n======== Generating report... ========" today = datetime.today() df = self.sol_df.copy() df["new"] = pd.Series([(1 if ix in self.added_items else 0) for ix in df.index], index=df.index) df["dd"] = [ datetime.strptime(dt, "%m/%d/%Y") for dt in df["deadline_date"].values ] report_df = df[(df["dd"] >= today) & (df["announcement_type"] != "Award")] writer = ExcelWriter(self.report_filename) report_df.to_excel(writer, self.sol_sheet_name, merge_cells=False) writer.save() writer.close() print "======== Report Generated as " + self.report_filename + " ========\n"
def write( self, writer, sheet_name="Sheet1", startrow=0, startcol=0, freeze_panes=None, engine=None, storage_options: StorageOptions = None, ): """ writer : path-like, file-like, or ExcelWriter object File path or existing ExcelWriter sheet_name : string, default 'Sheet1' Name of sheet which will contain DataFrame startrow : upper left cell row to dump area_data frame startcol : upper left cell column to dump area_data frame freeze_panes : tuple of integer (length 2), default None Specifies the one-based bottommost row and rightmost column that is to be frozen engine : string, default None write engine to use if writer is a path - you can also set this via the options ``io.excel.xlsx.writer``, ``io.excel.xls.writer``, and ``io.excel.xlsm.writer``. .. deprecated:: 1.2.0 As the `xlwt <https://pypi.org/project/xlwt/>`__ package is no longer maintained, the ``xlwt`` engine will be removed in a future version of pandas. {storage_options} .. versionadded:: 1.2.0 """ from pandas.io.excel import ExcelWriter num_rows, num_cols = self.df.shape if num_rows > self.max_rows or num_cols > self.max_cols: raise ValueError( f"This sheet is too large! Your sheet size is: {num_rows}, {num_cols} " f"Max sheet size is: {self.max_rows}, {self.max_cols}") formatted_cells = self.get_formatted_cells() if isinstance(writer, ExcelWriter): need_save = False else: # pandas\io\formats\excel.py:808: error: Cannot instantiate # abstract class 'ExcelWriter' with abstract attributes 'engine', # 'save', 'supported_extensions' and 'write_cells' [abstract] writer = ExcelWriter( # type: ignore[abstract] writer, engine=engine, storage_options=storage_options) need_save = True try: writer.write_cells( formatted_cells, sheet_name, startrow=startrow, startcol=startcol, freeze_panes=freeze_panes, ) finally: # make sure to close opened file handles if need_save: writer.close()
def write( self, writer, sheet_name="Sheet1", startrow=0, startcol=0, freeze_panes=None, engine=None, storage_options: StorageOptions = None, ): """ writer : path-like, file-like, or ExcelWriter object File path or existing ExcelWriter sheet_name : string, default 'Sheet1' Name of sheet which will contain DataFrame startrow : upper left cell row to dump data frame startcol : upper left cell column to dump data frame freeze_panes : tuple of integer (length 2), default None Specifies the one-based bottommost row and rightmost column that is to be frozen engine : string, default None write engine to use if writer is a path - you can also set this via the options ``io.excel.xlsx.writer``, ``io.excel.xls.writer``, and ``io.excel.xlsm.writer``. storage_options : dict, optional Extra options that make sense for a particular storage connection, e.g. host, port, username, password, etc., if using a URL that will be parsed by ``fsspec``, e.g., starting "s3://", "gcs://". .. versionadded:: 1.2.0 """ from pandas.io.excel import ExcelWriter num_rows, num_cols = self.df.shape if num_rows > self.max_rows or num_cols > self.max_cols: raise ValueError( f"This sheet is too large! Your sheet size is: {num_rows}, {num_cols} " f"Max sheet size is: {self.max_rows}, {self.max_cols}") formatted_cells = self.get_formatted_cells() if isinstance(writer, ExcelWriter): need_save = False else: # pandas\io\formats\excel.py:808: error: Cannot instantiate # abstract class 'ExcelWriter' with abstract attributes 'engine', # 'save', 'supported_extensions' and 'write_cells' [abstract] writer = ExcelWriter( # type: ignore[abstract] writer, engine=engine, storage_options=storage_options) need_save = True try: writer.write_cells( formatted_cells, sheet_name, startrow=startrow, startcol=startcol, freeze_panes=freeze_panes, ) finally: # make sure to close opened file handles if need_save: writer.close()
standardized = get_val(row.standardized) encoding = get_val(row.encoding) if not downloaded and len(downloaded) < 1: downloaded, encoding = spider.start_single( row.uri, Path.joinpath(folder_download, downloaded)) task_content.loc[index, C.REQ_DOWNLOAD] = downloaded.name task_content.loc[index, C.REQ_ENCODING] = encoding else: downloaded = folder_download.joinpath(downloaded) if not standardized and len(standardized) < 1: standardized = std.parse_one(downloaded=downloaded, from_lang=src_language, to_lang=dst_language, encoding=encoding) task_content.loc[index, C.REQ_STANDARDIZED] = standardized.name except Exception as e: exc_type, exc_val, _ = sys.exc_info() if type(e) == r.exceptions.ConnectionError: lg.error(f'{dir(e)}') lg.error(f'errno:{e.errno} - strerror:{e.strerror}') task_content.loc[ index, C.REQ_ERROR] = f'{type(e)}:{e.errno}:{e.strerror}' lg.error(f"Exception occurred: {exc_type}\n", exc_info=True) ew = ExcelWriter(str(pth)) task_content.to_excel(ew, index=False, encoding='utf-8') ew.save() ew.close() lg.info(f'** finished file:{pth}')