def _check_extension(self, ext): path = "__tmp_to_excel_from_excel__." + ext with ensure_clean(path) as path: self.frame["A"][:5] = nan self.frame.to_excel(path, "test1") self.frame.to_excel(path, "test1", cols=["A", "B"]) self.frame.to_excel(path, "test1", header=False) self.frame.to_excel(path, "test1", index=False) # test roundtrip self.frame.to_excel(path, "test1") recons = read_excel(path, "test1", index_col=0) tm.assert_frame_equal(self.frame, recons) self.frame.to_excel(path, "test1", index=False) recons = read_excel(path, "test1", index_col=None) recons.index = self.frame.index tm.assert_frame_equal(self.frame, recons) self.frame.to_excel(path, "test1", na_rep="NA") recons = read_excel(path, "test1", index_col=0, na_values=["NA"]) tm.assert_frame_equal(self.frame, recons) # GH 3611 self.frame.to_excel(path, "test1", na_rep="88") recons = read_excel(path, "test1", index_col=0, na_values=["88"]) tm.assert_frame_equal(self.frame, recons) self.frame.to_excel(path, "test1", na_rep="88") recons = read_excel(path, "test1", index_col=0, na_values=[88, 88.0]) tm.assert_frame_equal(self.frame, recons)
def test_roundtrip(self): _skip_if_no_xlrd() ext = self.ext path = '__tmp_to_excel_from_excel__.' + ext with ensure_clean(path) as path: self.frame['A'][:5] = nan self.frame.to_excel(path, 'test1') self.frame.to_excel(path, 'test1', cols=['A', 'B']) self.frame.to_excel(path, 'test1', header=False) self.frame.to_excel(path, 'test1', index=False) # test roundtrip self.frame.to_excel(path, 'test1') recons = read_excel(path, 'test1', index_col=0) tm.assert_frame_equal(self.frame, recons) self.frame.to_excel(path, 'test1', index=False) recons = read_excel(path, 'test1', index_col=None) recons.index = self.frame.index tm.assert_frame_equal(self.frame, recons) self.frame.to_excel(path, 'test1', na_rep='NA') recons = read_excel(path, 'test1', index_col=0, na_values=['NA']) tm.assert_frame_equal(self.frame, recons) # GH 3611 self.frame.to_excel(path, 'test1', na_rep='88') recons = read_excel(path, 'test1', index_col=0, na_values=['88']) tm.assert_frame_equal(self.frame, recons) self.frame.to_excel(path, 'test1', na_rep='88') recons = read_excel(path, 'test1', index_col=0, na_values=[88,88.0]) tm.assert_frame_equal(self.frame, recons)
def test_roundtrip(self): _skip_if_no_xlrd() with ensure_clean(self.ext) as path: self.frame['A'][:5] = nan self.frame.to_excel(path, 'test1') self.frame.to_excel(path, 'test1', cols=['A', 'B']) self.frame.to_excel(path, 'test1', header=False) self.frame.to_excel(path, 'test1', index=False) # test roundtrip self.frame.to_excel(path, 'test1') recons = read_excel(path, 'test1', index_col=0) tm.assert_frame_equal(self.frame, recons) self.frame.to_excel(path, 'test1', index=False) recons = read_excel(path, 'test1', index_col=None) recons.index = self.frame.index tm.assert_frame_equal(self.frame, recons) self.frame.to_excel(path, 'test1', na_rep='NA') recons = read_excel(path, 'test1', index_col=0, na_values=['NA']) tm.assert_frame_equal(self.frame, recons) # GH 3611 self.frame.to_excel(path, 'test1', na_rep='88') recons = read_excel(path, 'test1', index_col=0, na_values=['88']) tm.assert_frame_equal(self.frame, recons) self.frame.to_excel(path, 'test1', na_rep='88') recons = read_excel(path, 'test1', index_col=0, na_values=[88, 88.0]) tm.assert_frame_equal(self.frame, recons)
def _check_extension(self, ext): path = '__tmp_to_excel_from_excel__.' + ext with ensure_clean(path) as path: self.frame['A'][:5] = nan self.frame.to_excel(path, 'test1') self.frame.to_excel(path, 'test1', cols=['A', 'B']) self.frame.to_excel(path, 'test1', header=False) self.frame.to_excel(path, 'test1', index=False) # test roundtrip self.frame.to_excel(path, 'test1') recons = read_excel(path, 'test1', index_col=0) tm.assert_frame_equal(self.frame, recons) self.frame.to_excel(path, 'test1', index=False) recons = read_excel(path, 'test1', index_col=None) recons.index = self.frame.index tm.assert_frame_equal(self.frame, recons) self.frame.to_excel(path, 'test1', na_rep='NA') recons = read_excel(path, 'test1', index_col=0, na_values=['NA']) tm.assert_frame_equal(self.frame, recons) # GH 3611 self.frame.to_excel(path, 'test1', na_rep='88') recons = read_excel(path, 'test1', index_col=0, na_values=['88']) tm.assert_frame_equal(self.frame, recons) self.frame.to_excel(path, 'test1', na_rep='88') recons = read_excel(path, 'test1', index_col=0, na_values=[88,88.0]) tm.assert_frame_equal(self.frame, recons)
def test_read_from_http_url(self): _skip_if_no_xlrd() url = ('https://raw.github.com/pydata/pandas/master/' 'pandas/io/tests/data/test.xlsx') url_table = read_excel(url) dirpath = tm.get_data_path() localtable = os.path.join(dirpath, 'test.xlsx') local_table = read_excel(localtable) tm.assert_frame_equal(url_table, local_table)
def main(): global default_user global default_pass global default_secret global arguments #read arn parse arguments from command line arguments = getargs() # device_queue.maxsize(arguments.qs) print('Setting max Queue size to: ', arguments.qs) device_queue.maxsize = int(arguments.qs) worksheets = {} default_user = getusername() default_pass = getpassword(default_user) default_secret = getpassword('enable/secret') # Initializes the threads. CreateThreads(arguments.ts) with excel.ExcelFile(arguments.inputfile) as wb: for sname in wb.sheet_names: print('**** Sheet Name: '+ str(sname)) readsheet = excel.read_excel(wb,sheet_name=sname,converters={'Username':str,'Password':str,'Secret':str,'data_type':str,'Show_Commands':str,'Config_Commands':str}) df = DataFrame(data=readsheet, copy=True) worksheets[sname] = df.to_dict(orient='records') for rw in worksheets[sname]: device_queue.put(rw) device_queue.join() print(threading.enumerate()) print('Playbook completed successfully!!')
def readTARAxls(datafile, headerline): df = read_excel(datafile, header=headerline) df.columns = _fix_header(df.iloc[2]) df = df.drop(range(4)) df = df.rename(columns = {'Sample ID':'TARA_SampleID', 'Sample ID.2':'SampleID'})\ .drop(['Sample ID.1', 'PARAMETER'], axis=1).set_index('SampleID') return df.loc[df.index != 'none']
def main(workshop, mailchimpdir): if not os.path.isdir(mailchimpdir): print(mailchimpdir, 'must be a directory.') return wdict = {} wdf = ex.read_excel(workshop, 'Sheet1', parse_cols=(0, 1, 2)) wdf.rename(columns={ 'Surname': 'surname', 'First name': 'first_name', 'Email address': 'email_address' }, inplace=True) wdf.dropna(subset=['email_address'], inplace=True) wdf.fillna('', inplace=True) for c in wdf.columns: wdf[c] = wdf[c].str.strip() for row in wdf.itertuples(): wdict[row.email_address] = make_entry(row) # print(wdict) for filename in os.listdir(mailchimpdir): if not filename.endswith('.csv'): continue prune(wdict, mailchimpdir, filename) print('Workshop emails:', len(wdict)) outdir = os.path.dirname(workshop) outdir = os.path.join(outdir, 'results') os.makedirs(outdir, exist_ok=True) outfilename = _starttime.strftime("workshop_emails_%Y%m%d-%H%M%S.csv") outpath = os.path.join(outdir, outfilename) with open(outpath, 'w', newline='') as outfile: outwriter = csv.writer(outfile) outwriter.writerow('email surname firstname'.split()) for k in sorted(wdict): outwriter.writerow(wdict[k])
def test_read_from_file_url(self): _skip_if_no_xlrd() # FILE if sys.version_info[:2] < (2, 6): raise nose.SkipTest("file:// not supported with Python < 2.6") dirpath = tm.get_data_path() localtable = os.path.join(dirpath, 'test.xlsx') local_table = read_excel(localtable) try: url_table = read_excel('file://localhost/' + localtable) except URLError: # fails on some systems raise nose.SkipTest("failing on %s" % ' '.join(platform.uname()).strip()) tm.assert_frame_equal(url_table, local_table)
def test_reader_seconds(self): # Test reading times with and without milliseconds. GH5945. _skip_if_no_xlrd() import xlrd if LooseVersion(xlrd.__VERSION__) >= LooseVersion("0.9.3"): # Xlrd >= 0.9.3 can handle Excel milliseconds. expected = DataFrame.from_items([("Time", [ time(1, 2, 3), time(2, 45, 56, 100000), time(4, 29, 49, 200000), time(6, 13, 42, 300000), time(7, 57, 35, 400000), time(9, 41, 28, 500000), time(11, 25, 21, 600000), time(13, 9, 14, 700000), time(14, 53, 7, 800000), time(16, 37, 0, 900000), time(18, 20, 54) ])]) else: # Xlrd < 0.9.3 rounds Excel milliseconds. expected = DataFrame.from_items([("Time", [ time(1, 2, 3), time(2, 45, 56), time(4, 29, 49), time(6, 13, 42), time(7, 57, 35), time(9, 41, 29), time(11, 25, 22), time(13, 9, 15), time(14, 53, 8), time(16, 37, 1), time(18, 20, 54) ])]) epoch_1900 = os.path.join(self.dirpath, 'times_1900.xls') epoch_1904 = os.path.join(self.dirpath, 'times_1904.xls') actual = read_excel(epoch_1900, 'Sheet1') tm.assert_frame_equal(actual, expected) actual = read_excel(epoch_1904, 'Sheet1') tm.assert_frame_equal(actual, expected)
def test_reader_seconds(self): # Test reading times with and without milliseconds. GH5945. _skip_if_no_xlrd() import xlrd if LooseVersion(xlrd.__VERSION__) >= LooseVersion("0.9.3"): # Xlrd >= 0.9.3 can handle Excel milliseconds. expected = DataFrame.from_items([("Time", [time(1, 2, 3), time(2, 45, 56, 100000), time(4, 29, 49, 200000), time(6, 13, 42, 300000), time(7, 57, 35, 400000), time(9, 41, 28, 500000), time(11, 25, 21, 600000), time(13, 9, 14, 700000), time(14, 53, 7, 800000), time(16, 37, 0, 900000), time(18, 20, 54)])]) else: # Xlrd < 0.9.3 rounds Excel milliseconds. expected = DataFrame.from_items([("Time", [time(1, 2, 3), time(2, 45, 56), time(4, 29, 49), time(6, 13, 42), time(7, 57, 35), time(9, 41, 29), time(11, 25, 22), time(13, 9, 15), time(14, 53, 8), time(16, 37, 1), time(18, 20, 54)])]) epoch_1900 = os.path.join(self.dirpath, 'times_1900.xls') epoch_1904 = os.path.join(self.dirpath, 'times_1904.xls') actual = read_excel(epoch_1900, 'Sheet1') tm.assert_frame_equal(actual, expected) actual = read_excel(epoch_1904, 'Sheet1') tm.assert_frame_equal(actual, expected)
def main(): cartoons = ex.read_excel(WORKBOOK, 'Sheet1') cartoons.dropna(inplace=True) # cartoons['ISODate'] = pd.to_datetime(cartoons.ISODate) cartoons['Year'] = cartoons.ISODate.dt.year.astype('int') c = cartoons.groupby('Year').count() p = c.ISODate.plot(kind='bar', legend=False) fig = p.get_figure() fig.savefig(BARPLOT) Image.open(BARPLOT).save(BARPLOT[:-4] + '.jpg', 'JPEG') print('Exit plot.')
def init_piaofang_history_data(self): try: temp = read_excel(self.piaofang_history_fname) except IOError as e: print(e) return None ds = [x.date() for x in temp['发布时间']] temp.index = diff_date(self.pub_date, ds) self.piaofang_h = temp.drop(['内部编号','发布时间','收集时间', '更新时间'], axis=1).groupby(level=0).max()
def init_gewara_data(self): try: temp = read_excel(self.gewara_fname) except IOError as e: print(e) return None ds = [x.date() for x in temp['收集时间']] temp.index = diff_date(self.pub_date, ds) self.gewara_f = temp.drop(['内部编号', '格瓦拉编号', '收集时间', '更新时间'], axis=1).groupby(level=0).max()
def init_douban_comment(self): try: temp = read_excel(self.douban_fname) except IOError as e: print(e) return None ds = np.array([x.date() for x in temp['收集时间']]) temp.index = diff_date(self.pub_date, ds) self.douban_f = temp.drop(['内部编号','豆瓣编号', '收集时间','更新时间'], axis = 1)
def init_baidu_index(self): try: temp = read_excel(self.baidu_fname) except IOError as msg: print(e) return None ds = [x.date() for x in temp['收集时间']] temp.index = diff_date(self.pub_date, ds) self.baidu_f = temp.drop(['内部编号','搜索类型','搜索排名', '搜索趋势','收集时间','更新时间'], axis=1).groupby(level=0).max()
def test_to_excel_output_encoding(self): _skip_if_no_xlrd() ext = self.ext filename = '__tmp_to_excel_float_format__.' + ext df = DataFrame([[u('\u0192'), u('\u0193'), u('\u0194')], [u('\u0195'), u('\u0196'), u('\u0197')]], index=[u('A\u0192'), 'B'], columns=[u('X\u0193'), 'Y', 'Z']) with ensure_clean(filename) as filename: df.to_excel(filename, sheet_name='TestSheet', encoding='utf8') result = read_excel(filename, 'TestSheet', encoding='utf8') tm.assert_frame_equal(result, df)
def test_swapped_columns(self): # Test for issue #5427. _skip_if_no_xlrd() with ensure_clean(self.ext) as path: write_frame = DataFrame({'A': [1, 1, 1], 'B': [2, 2, 2]}) write_frame.to_excel(path, 'test1', cols=['B', 'A']) read_frame = read_excel(path, 'test1', header=0) tm.assert_series_equal(write_frame['A'], read_frame['A']) tm.assert_series_equal(write_frame['B'], read_frame['B'])
def getbasemetadata(): df = read_excel(Biodata.TARA.SeqContextXL, header=16) df = df.drop(range(4)).rename(columns={'Analysis ID.1':'experiment_accession'})\ .set_index('experiment_accession') sample_sheet = read_csv(RawFastq.TARA.SampleSheet, sep='\t').set_index('experiment_accession') df = sample_sheet.join(df) df['RunID'] = df['run_accession'] df['SampleID'] = df['secondary_sample_accession'] df = df[df['submitted_ftp'].apply(lambda x: '.fastq.gz' in x)] df['Fastq_1'] = df['submitted_ftp'].apply( lambda x: join(RawFastq.TARA.FastqDir, basename(x.split(';')[0]))) df['Fastq_2'] = df['submitted_ftp'].apply( lambda x: join(RawFastq.TARA.FastqDir, basename(x.split(';')[0]))) df['ICRABAM_1'] = df['submitted_ftp'].apply(lambda x: join( Mapping.OM_RGC.MapDir, basename(x.split(';')[0].replace('.fastq.gz', '.icra.bam')))) df['ICRABAM_2'] = df['submitted_ftp'].apply(lambda x: join( Mapping.OM_RGC.MapDir, basename(x.split(';')[1].replace('.fastq.gz', '.icra.bam')))) df = df.rename( columns={ 'Sample ID': 'TARA_SampleID', 'Analysis label': 'Analysis_label', 'Environmental feature': 'Environmental_feature', 'Size fraction, lower threshold': 'Size_min', 'Size fraction, upper threshold': 'Size_max', 'Depth, top/min': 'Depth_min', 'Depth, bottom/max': 'Depth_max', 'Event label': 'Event_label', 'Station label': 'Station_label' }) df['Collection_datetime'] = df['Event_label'].dropna()\ .apply(lambda x: datetime.strptime(x.split('_')[1], '%Y%m%dT%H%MZ')) stations = read_csv(Biodata.TARA.Stations, sep='\t') df['Latitude'] = df['Station_label'].dropna()\ .apply(lambda x: stations[stations['Station'] == x]['Latitude'].values[0]) df['Longitude'] = df['Station_label'].dropna()\ .apply(lambda x: stations[stations['Station'] == x]['Longitude'].values[0]) df['Depth'] = df[['Depth_min', 'Depth_max']].astype(float).mean(1) df['Cruise_series'] = 'TARA' df = df[[ 'SampleID', 'RunID', 'Cruise_series', 'Collection_datetime', 'Latitude', 'Longitude', 'Depth', 'TARA_SampleID', 'Analysis_label', 'Event_label', 'Station_label', 'Environmental_feature', 'Size_min', 'Size_max', 'Depth_min', 'Depth_max', 'Fastq_1', 'Fastq_2', 'ICRABAM_1', 'ICRABAM_2' ]] df = df.reset_index().set_index(['SampleID', 'RunID']) df.to_pickle(Biodata.TARA.metadataDF) return df['experiment_accession'].values
def test_int_types(self): _skip_if_no_xlrd() for np_type in (np.int8, np.int16, np.int32, np.int64): with ensure_clean(self.ext) as path: # Test np.int values read come back as int (rather than float # which is Excel's format). frame = DataFrame(np.random.randint(-10, 10, size=(10, 2)), dtype=np_type) frame.to_excel(path, 'test1') reader = ExcelFile(path) recons = reader.parse('test1') int_frame = frame.astype(int) tm.assert_frame_equal(int_frame, recons) recons2 = read_excel(path, 'test1') tm.assert_frame_equal(int_frame, recons2) # test with convert_float=False comes back as float float_frame = frame.astype(float) recons = read_excel(path, 'test1', convert_float=False) tm.assert_frame_equal(recons, float_frame)
def test_int_types(self): _skip_if_no_xlrd() for np_type in (np.int8, np.int16, np.int32, np.int64): with ensure_clean(self.ext) as path: # Test np.int values read come back as int (rather than float # which is Excel's format). frame = DataFrame(np.random.randint(-10, 10, size=(10, 2)), dtype=np_type) frame.to_excel(path, 'test1') reader = ExcelFile(path) recons = reader.parse('test1') int_frame = frame.astype(np.int64) tm.assert_frame_equal(int_frame, recons) recons2 = read_excel(path, 'test1') tm.assert_frame_equal(int_frame, recons2) # test with convert_float=False comes back as float float_frame = frame.astype(float) recons = read_excel(path, 'test1', convert_float=False) tm.assert_frame_equal(recons, float_frame)
def test_swapped_columns(self): # Test for issue #5427. _skip_if_no_xlrd() with ensure_clean(self.ext) as path: write_frame = DataFrame({'A': [1, 1, 1], 'B': [2, 2, 2]}) write_frame.to_excel(path, 'test1', columns=['B', 'A']) read_frame = read_excel(path, 'test1', header=0) tm.assert_series_equal(write_frame['A'], read_frame['A']) tm.assert_series_equal(write_frame['B'], read_frame['B'])
def test_to_excel_output_encoding(self): _skip_if_no_xlrd() ext = self.ext filename = '__tmp_to_excel_float_format__.' + ext df = DataFrame( [[u('\u0192'), u('\u0193'), u('\u0194')], [u('\u0195'), u('\u0196'), u('\u0197')]], index=[u('A\u0192'), 'B'], columns=[u('X\u0193'), 'Y', 'Z']) with ensure_clean(filename) as filename: df.to_excel(filename, sheet_name='TestSheet', encoding='utf8') result = read_excel(filename, 'TestSheet', encoding='utf8') tm.assert_frame_equal(result, df)
def create_df(self): if self.file_path.endswith('.csv'): df = pd.read_csv(self.file_path) else: wb = xlrd.open_workbook(self.file_path) sheet = wb.sheets()[0] df = read_excel(self.file_path,sheet.name) df_no_nan = df.where((pd.notnull(df)), None) return df_no_nan
def test_duplicated_columns(self): # Test for issue #5235. _skip_if_no_xlrd() with ensure_clean(self.ext) as path: write_frame = DataFrame([[1, 2, 3], [1, 2, 3], [1, 2, 3]]) colnames = ['A', 'B', 'B'] write_frame.columns = colnames write_frame.to_excel(path, 'test1') read_frame = read_excel(path, 'test1') read_frame.columns = colnames tm.assert_frame_equal(write_frame, read_frame)
def _check_extension(self, ext): path = '__tmp_to_excel_from_excel__.' + ext with ensure_clean(path) as path: self.frame['A'][:5] = nan self.frame.to_excel(path, 'test1') self.frame.to_excel(path, 'test1', cols=['A', 'B']) self.frame.to_excel(path, 'test1', header=False) self.frame.to_excel(path, 'test1', index=False) # test roundtrip self.frame.to_excel(path, 'test1') recons = read_excel(path, 'test1', index_col=0) tm.assert_frame_equal(self.frame, recons) self.frame.to_excel(path, 'test1', index=False) recons = read_excel(path, 'test1', index_col=None) recons.index = self.frame.index tm.assert_frame_equal(self.frame, recons) self.frame.to_excel(path, 'test1', na_rep='NA') recons = read_excel(path, 'test1', index_col=0, na_values=['NA']) tm.assert_frame_equal(self.frame, recons)
def test_reader_special_dtypes(self): _skip_if_no_xlrd() expected = DataFrame.from_items([ ("IntCol", [1, 2, -3, 4, 0]), ("FloatCol", [1.25, 2.25, 1.83, 1.92, 0.0000000005]), ("BoolCol", [True, False, True, True, False]), ("StrCol", [1, 2, 3, 4, 5]), # GH5394 - this is why convert_float isn't vectorized ("Str2Col", ["a", 3, "c", "d", "e"]), ("DateCol", [ datetime(2013, 10, 30), datetime(2013, 10, 31), datetime(1905, 1, 1), datetime(2013, 12, 14), datetime(2015, 3, 14) ]) ]) xlsx_path = os.path.join(self.dirpath, 'test_types.xlsx') xls_path = os.path.join(self.dirpath, 'test_types.xls') # should read in correctly and infer types for path in (xls_path, xlsx_path): actual = read_excel(path, 'Sheet1') tm.assert_frame_equal(actual, expected) # if not coercing number, then int comes in as float float_expected = expected.copy() float_expected["IntCol"] = float_expected["IntCol"].astype(float) float_expected.loc[1, "Str2Col"] = 3.0 for path in (xls_path, xlsx_path): actual = read_excel(path, 'Sheet1', convert_float=False) tm.assert_frame_equal(actual, float_expected) # check setting Index (assuming xls and xlsx are the same here) for icol, name in enumerate(expected.columns): actual = read_excel(xlsx_path, 'Sheet1', index_col=icol) actual2 = read_excel(xlsx_path, 'Sheet1', index_col=name) exp = expected.set_index(name) tm.assert_frame_equal(actual, exp) tm.assert_frame_equal(actual2, exp) # convert_float and converters should be different but both accepted expected["StrCol"] = expected["StrCol"].apply(str) actual = read_excel(xlsx_path, 'Sheet1', converters={"StrCol": str}) tm.assert_frame_equal(actual, expected) no_convert_float = float_expected.copy() no_convert_float["StrCol"] = no_convert_float["StrCol"].apply(str) actual = read_excel(xlsx_path, 'Sheet1', converters={"StrCol": str}, convert_float=False) tm.assert_frame_equal(actual, no_convert_float)
def test_duplicated_columns(self): # Test for issue #5235. _skip_if_no_xlrd() ext = self.ext path = '__tmp_to_excel_duplicated_columns__.' + ext with ensure_clean(path) as path: write_frame = DataFrame([[1, 2, 3], [1, 2, 3], [1, 2, 3]]) colnames = ['A', 'B', 'B'] write_frame.columns = colnames write_frame.to_excel(path, 'test1') read_frame = read_excel(path, 'test1').astype(np.int64) read_frame.columns = colnames tm.assert_frame_equal(write_frame, read_frame)
def test_read_xlrd_Book(self): _skip_if_no_xlrd() _skip_if_no_xlwt() import xlrd df = self.frame with ensure_clean('.xls') as pth: df.to_excel(pth, "SheetA") book = xlrd.open_workbook(pth) with ExcelFile(book, engine="xlrd") as xl: result = xl.parse("SheetA") tm.assert_frame_equal(df, result) result = read_excel(book, sheetname="SheetA", engine="xlrd") tm.assert_frame_equal(df, result)
def test_reader_special_dtypes(self): _skip_if_no_xlrd() expected = DataFrame.from_items([ ("IntCol", [1, 2, -3, 4, 0]), ("FloatCol", [1.25, 2.25, 1.83, 1.92, 0.0000000005]), ("BoolCol", [True, False, True, True, False]), ("StrCol", [1, 2, 3, 4, 5]), # GH5394 - this is why convert_float isn't vectorized ("Str2Col", ["a", 3, "c", "d", "e"]), ("DateCol", [datetime(2013, 10, 30), datetime(2013, 10, 31), datetime(1905, 1, 1), datetime(2013, 12, 14), datetime(2015, 3, 14)]) ]) xlsx_path = os.path.join(self.dirpath, 'test_types.xlsx') xls_path = os.path.join(self.dirpath, 'test_types.xls') # should read in correctly and infer types for path in (xls_path, xlsx_path): actual = read_excel(path, 'Sheet1') tm.assert_frame_equal(actual, expected) # if not coercing number, then int comes in as float float_expected = expected.copy() float_expected["IntCol"] = float_expected["IntCol"].astype(float) float_expected.loc[1, "Str2Col"] = 3.0 for path in (xls_path, xlsx_path): actual = read_excel(path, 'Sheet1', convert_float=False) tm.assert_frame_equal(actual, float_expected) # check setting Index (assuming xls and xlsx are the same here) for icol, name in enumerate(expected.columns): actual = read_excel(xlsx_path, 'Sheet1', index_col=icol) actual2 = read_excel(xlsx_path, 'Sheet1', index_col=name) exp = expected.set_index(name) tm.assert_frame_equal(actual, exp) tm.assert_frame_equal(actual2, exp) # convert_float and converters should be different but both accepted expected["StrCol"] = expected["StrCol"].apply(str) actual = read_excel(xlsx_path, 'Sheet1', converters={"StrCol": str}) tm.assert_frame_equal(actual, expected) no_convert_float = float_expected.copy() no_convert_float["StrCol"] = no_convert_float["StrCol"].apply(str) actual = read_excel(xlsx_path, 'Sheet1', converters={"StrCol": str}, convert_float=False) tm.assert_frame_equal(actual, no_convert_float)
import pandas.io.excel as x df = x.read_excel("/home/burak/Downloads/CNTSDATA.xls") df.to_csv("/tmp/CNTSDATA.csv", encoding="utf-8")
if not os.path.exists(dataset_fname): download(dataset_url, server_fname=dataset_fname) """ The dataset is stored as an Excel spreadsheet (XLS). We can read it without unzipping using the zipfile library. """ archive = zipfile.ZipFile(dataset_fname, 'r') # Only one file in the zipfile we are reading from # archive.open returns a file-like object - perfect for sending to pandas file_handle = archive.open(archive.infolist()[0]) """ To read the actual XLS file, we can use pandas. """ dataframe = read_excel(file_handle) data = dataframe.values """ Only use the first 100 users for this example. """ user_indices = data[:100, 0] ratings = data[:100, 1:] # Necessary because this is a view of the underlying data, want separate copy true_ratings = np.copy(data[:100, 1:]) """ In this dataset, any rating of 99. means that a joke was unrated. Since these are floating point values, it is best to create the sparse array by hand. We can get these indices with np.where. """
def ReadWorkBookIntoQueue(inputSubPlan, portMatrix): next_service = False worksheets = {} ManagementIP = '' ManagementMask = '' ManagementVLAN = '' dataSubnet = '' Subnetmask = 0 current_floor = 0 current_IDF_ID = '' current_service = '' mgmtIPoctect = 0 mgmtIPTracker = 0 portmatrixwb = excel.ExcelFile(portMatrix) if arguments.configtype.upper() == 'AL': configt = 'Data' elif arguments.configtype.upper() == 'WL': configt = 'Wireless' else: configt = 'Security Cameras' with excel.ExcelFile(inputSubPlan) as wb: for sname in wb.sheet_names: print('**** Sheet Name: '+ str(sname)) #readsheet = excel.read_excel(wb,sheet_name=sname,converters={'Username':str,'Password':str,'Secret':str,'data_type':str,'Show_Commands':str,'Config_Commands':str}) readsheet = excel.read_excel(wb,sheet_name=sname) df = DataFrame(data=readsheet, copy=True) worksheets[sname] = df.to_dict(orient='records') print('Finding management subnet and VLAN: \n') for rw in worksheets[sname]: if rw.get('Service') == 'Wired Switch Management' and configt == 'Data': ManagementIP, ManagementMask = str(rw.get('Assigned Subnets')).split('/') ManagementVLAN = rw.get('VLAN') break elif rw.get('Service') == 'Wireless Switch Management' and configt == 'Wireless': ManagementIP, ManagementMask = str(rw.get('Assigned Subnets')).split('/') ManagementVLAN = rw.get('VLAN') break else: if rw.get('Service') == 'Security Switch Management' and configt == 'Security Cameras': ManagementIP, ManagementMask = str(rw.get('Assigned Subnets')).split('/') ManagementVLAN = rw.get('VLAN') break for rw in worksheets[sname]: if next_service and rw.get('Service') == rw.get('Service'): break if rw.get('Service') == configt: current_service = str(rw.get('Service')).strip() print('found service: ', rw.get('Service')) if (current_service == configt): print('processing next...') switch_dict = {'jinjatemplate': '', 'hostname': '', 'IDFID': '', 'managementMask': '', 'ManagementIP': '', \ 'datavlanname': '', 'datavlans': [], 'datasubnet': '', 'datamask': '', 'voicevlanname': '', \ 'voicevlans': [], 'voicesubnet': '', 'voicemask': '', 'managementVLAN': '', 'managmentsubnet': '', \ 'po': {'ponum': '', 'interfaces': {}}} next_service = True # find current floor and assign 10 to mgmtIPoctect for management last octect if rw.get('Floor') == rw.get('Floor'): current_floor = rw.get('Floor') mgmtIPTracker = mgmtIPTracker + 1 mgmtIPoctect = (mgmtIPTracker * 10) + 1 # Gets IDFID base on Switch name if rw.get('Switch') == rw.get('Switch'): current_IDF_ID = str(rw.get('Switch').upper()) #current_IDF_ID = current_IDF_ID.replace('-','_') current_IDF_ID = current_IDF_ID.replace('_AL1','') current_IDF_ID = current_IDF_ID.replace('_AL2','') current_IDF_ID = current_IDF_ID.replace('_AL3','') current_IDF_ID = current_IDF_ID.replace('_WL1','') current_IDF_ID = current_IDF_ID.replace('_SE1','') current_IDF_ID = GenVlanName("",current_IDF_ID) if rw.get('Assigned Subnets') == rw.get('Assigned Subnets'): dataSubnet, Subnetmask = str(rw.get('Assigned Subnets')).split('/') switch_dict['jinjatemplate'] = templatefile switch_dict['hostname'] = str(rw.get('Switch')).upper() switch_dict['IDFID'] = current_IDF_ID switch_dict['datasubnet'] = dataSubnet.strip() switch_dict['datamask'] = cidr_to_netmask(Subnetmask) if configt == 'Data' or configt == 'Wireless': switch_dict['datavlanname'] = GenVlanName(configt.upper() + '_',switch_dict['hostname']) else: temp_service, garbage = configt.split(" ") switch_dict['datavlanname'] = GenVlanName(temp_service.upper() + '_',switch_dict['hostname']) switch_dict['managmentsubnet'], garbage = str(ManagementIP).strip().split('.0',3) switch_dict['managementMask'] = cidr_to_netmask(ManagementMask) switch_dict['managementVLAN'] = str(ManagementVLAN).strip() if current_service == 'Data': portmatrixsh = portmatrixwb.parse(sheet_name='6807 Wired VSS') print('Processing AL Port Matrix ...') elif current_service == 'Security Cameras': portmatrixsh = portmatrixwb.parse(sheet_name='6840 SEC VSS') print('Processing SE Port Matrix ...') else: portmatrixsh = portmatrixwb.parse(sheet_name='6807 WL VSS') print('Processing WL Port Matrix ...') for pmxrow in portmatrixsh.to_records(): # apply this logic to AL tab in port matrix if str(switch_dict['hostname']).upper().strip() == str(pmxrow[7]).upper().strip(): switch_dict['po']['ponum'] = pmxrow[5][2:].strip() switch_dict['po']['interfaces'][pmxrow[8]] = pmxrow[1] switch_dict['po']['interfaces'][pmxrow[19]] = pmxrow[13] # apply this logic to fields on WL and SEC in port matrix if str(switch_dict['hostname']).upper().strip() == str(pmxrow[6]).upper().strip(): switch_dict['po']['ponum'] = pmxrow[4][2:].strip() switch_dict['po']['interfaces'][pmxrow[7]] = pmxrow[1] switch_dict['po']['interfaces'][pmxrow[16]] = pmxrow[11] vl = str(rw.get('VLAN')).split('\n') for vlan in vl: vlantoadd = str(vlan) switch_dict['datavlans'].append(str(int(float(vlantoadd)))) if configt == 'Data': switch_dict['ManagementIP'] = switch_dict['managmentsubnet'] + '.' + \ switch_dict['datavlans'][0][len(switch_dict['datavlans'])-3:] else: switch_dict['ManagementIP'] = switch_dict['managmentsubnet'] + '.' + str(mgmtIPoctect) mgmtIPoctect = mgmtIPoctect + 1 # find voice vlan and add to dictionary for vc in worksheets[sname]: if vc.get('Service') == vc.get('Service'): current_service_vc = vc.get('Service') if current_service_vc == 'Voice' and str(vc.get('Switch')).upper() == str(switch_dict['hostname']).upper(): voiceSubnet, Subnetmask = str(vc.get('Assigned Subnets')).split('/') switch_dict['voicevlanname'] = GenVlanName(current_service_vc.upper() + '_',switch_dict['hostname']) switch_dict['voicesubnet'] = voiceSubnet.strip() switch_dict['voicemask'] = cidr_to_netmask(Subnetmask) vl = str(vc.get('VLAN')).split('\n') for vlan in vl: vlantoadd = str(vlan) switch_dict['voicevlans'].append(str(int(float(vlantoadd)))) break print(switch_dict, '\n') print('Generating Config ....> ') GenerateConfig(switch_dict) device_queue.put(switch_dict)
import pandas.io.excel as x df = x.read_excel('/home/burak/Downloads/CNTSDATA.xls') df.to_csv('/tmp/CNTSDATA.csv', encoding='utf-8')
""" dataset_fname = dataset_url.split("/")[-1] if not os.path.exists(dataset_fname): download(dataset_url, server_fname=dataset_fname) """ The dataset is stored as an Excel spreadsheet (XLS). We can read it without unzipping using the zipfile library. """ archive = zipfile.ZipFile(dataset_fname, 'r') # Only one file in the zipfile we are reading from # archive.open returns a file-like object - perfect for sending to pandas file_handle = archive.open(archive.infolist()[0]) """ To read the actual XLS file, we can use pandas. """ dataframe = read_excel(file_handle) data = dataframe.values """ Only use the first 100 users for this example. """ user_indices = data[:100, 0] ratings = data[:100, 1:] # Necessary because this is a view of the underlying data, want separate copy true_ratings = np.copy(data[:100, 1:]) """ In this dataset, any rating of 99. means that a joke was unrated. Since these are floating point values, it is best to create the sparse array by hand. We can get these indices with np.where. """ rated = np.where(ratings <= 10.) np.random.RandomState(1999)
def main(): #read arn parse arguments from command line arguments = getargs() worksheets = {} playbookinfo = { 'creds': { 'device_type': "", 'ip': "", 'username': "", 'password': "", 'secret': "" }, 'ShowCommands': [], 'ConfigCommands': [] } with excel.ExcelFile(arguments.inputfile) as wb: playbookinfo['ShowCommands'] = '' playbookinfo['ConfigCommands'] = '' for sname in wb.sheet_names: print('**** Sheet Name: ' + str(sname)) readsheet = excel.read_excel(wb,sheet_name=sname,converters={'Username':str,'Password':str,'Secret':str,\ 'data_type':str,'Show_Commands':str,'Config_Commands':str}) df = DataFrame(data=readsheet, copy=True) worksheets[sname] = df.to_dict(orient='records') for rw in worksheets[sname]: playbookinfo['creds']['device_type'] = rw.get('device_type') playbookinfo['creds']['ip'] = rw.get('IP') # if username field in playbook is blank, interactively enter username if rw.get('Username') != rw.get('Username') or rw.get( 'Username').strip() == '': playbookinfo['creds']['username'] = getusername() else: playbookinfo['creds']['username'] = rw.get('Username') print('\nLogin into: ' + playbookinfo['creds']['ip'] + ' ...') # if password field in playbook is blank, interactively enter password if rw.get('Password') != rw.get('Password') or rw.get( 'Password').strip() == '': playbookinfo['creds']['password'] = getpassword( playbookinfo['creds']['username']) else: playbookinfo['creds']['password'] = rw.get('Password') # if secret field in playbook is blank ask user if it wants to enter one if rw.get('Secret') != rw.get('Secret') or rw.get( 'Secret') == '': if input( 'do you want to enter enabled/secret password(Y/N): ' ).upper() == 'Y': playbookinfo['creds']['secret'] = getpassword( 'secret/enabled') else: playbookinfo['creds']['secret'] = rw.get('Secret') playbookinfo['ShowCommands'] = str( rw.get('Show_Commands')).split('\n') playbookinfo['ConfigCommands'] = str( rw.get('Config_Commands')).split('\n') conn = connectToDevice(playbookinfo['creds']) resultprompt = conn.find_prompt() if resultprompt[len(resultprompt) - 1] != "#": print( "----> Changing from User mode to privilege mode <----\n" + resultprompt) conn.enable() resultprompt = conn.find_prompt() print(resultprompt) else: print("----> Already in privilege mode <----\n" + resultprompt) qalog = openlogfile(resultprompt, playbookinfo['creds']['ip']) if (rw.get('Show_Commands') == rw.get('Show_Commands')) and \ len(str(rw.get('Show_Commands')).strip()) > 0 and rw.get('Config_Commands') != rw.get('Config_Commands'): print(\ '*****************************************************\n' + \ '*** Running show commands ***\n' + \ '*****************************************************\n') logshowcommands(qalog, conn, playbookinfo['ShowCommands']) if (rw.get('Config_Commands') == rw.get('Config_Commands')) and \ len(str(rw.get('Config_Commands')).strip()) > 0: print(\ '*****************************************************\n' + \ '*** Entering config mode ***\n' + \ '*****************************************************\n') print(\ '*****************************************************\n' + \ '*** Running show commands - before changes ***\n' + \ '*****************************************************\n') qalog.write(\ '*****************************************************\n' + \ '*** Running show commands - before changes ***\n' + \ '*****************************************************\n') if rw.get('Show_Commands') == rw.get('Show_Commands'): logshowcommands(qalog, conn, playbookinfo['ShowCommands']) else: logshowcommands(qalog, conn, SHOWCOMMANDS) configresults = conn.send_config_set( config_commands=playbookinfo['ConfigCommands']) print(\ '*****************************************************\n' + \ '*** Configurations Changes ***\n' + \ '*****************************************************\n') print(configresults) qalog.write(get_logheader('Configuration changes')) qalog.write(configresults + '\n') if arguments.w.upper() == 'Y': print(\ '*****************************************************\n' + \ '*** Writing Running Config to Startup Config ***\n' + \ '*****************************************************\n') qalog.write(\ '*****************************************************\n' + \ '*** Writing Running Config to Startup Config ***\n' + \ '*****************************************************\n') configresults = conn.send_command('write mem') print(configresults) qalog.write(configresults) print(\ '*****************************************************\n' + \ '*** Running show commands - after configurations ***\n' + \ '*****************************************************\n') qalog.write(\ '*****************************************************\n' + \ '*** Running show commands - after configurations ***\n' + \ '*****************************************************\n') if rw.get('Show_Commands') == rw.get('Show_Commands'): logshowcommands(qalog, conn, playbookinfo['ShowCommands']) else: logshowcommands(qalog, conn, SHOWCOMMANDS) qalog.close() conn.disconnect() print('\n**** Task(s) Completed with no errors ****')
def main(): #creds = get_devinfo() worksheets = {} playbookinfo = { 'creds': { 'device_type': "", 'ip': "", 'username': "", 'password': "", 'secret': "ScHn31d3r" }, 'credsCore': { 'device_type': "", 'ip': "", 'username': "", 'password': "", 'secret': "" }, 'listofcommands': ["show run", "show vlan", "show int status", "show int trunk"] } with excel.ExcelFile("Cellar metering ports.xlsx") as wb: for sname in wb.sheet_names: print("print sheetname: ", sname) readsheet = excel.read_excel(wb, sheet_name=sname, converters={ 'Username': str, 'Password': str, 'Secret': str, 'data_type': str }) df = DataFrame(data=readsheet, copy=True) worksheets[sname] = df.to_dict(orient='records') playbookinfo['creds']['username'] = getusername() playbookinfo['creds']['password'] = getpassword( playbookinfo['creds']['username']) playbookinfo['credsCore']['username'] = playbookinfo['creds'][ 'username'] playbookinfo['credsCore']['password'] = playbookinfo['creds'][ 'password'] for rw in worksheets[sname]: playbookinfo['creds']['device_type'] = 'cisco_ios' playbookinfo['creds']['ip'] = rw.get('IP') playbookinfo['credsCore']['ip'] = rw.get('CoreIP') playbookinfo['credsCore']['device_type'] = playbookinfo[ 'creds']['device_type'] portrange = str(rw.get('Metering')) vlan = str(rw.get('VLAN')) conn = connectToDevice(playbookinfo['creds']) resultprompt = conn.find_prompt() print('Processing device: ' + playbookinfo['creds']['ip'] + '\n') if resultprompt[len(resultprompt) - 1] != "#": print("Changing from User mode to privilege mode\n" + resultprompt) conn.enable() resultprompt = conn.find_prompt() print(resultprompt) else: print("Already in privilege mode\n" + resultprompt) qalog = openlogfile(resultprompt, playbookinfo['creds']['ip']) qalog.write(get_logheader('Port(s) VLAN change - ' + portrange)) # Log config before changes qalog.write('********** BEFORE CHANGES ***********') for cmd in playbookinfo['listofcommands']: commandresults = conn.send_command(cmd) qalog.write(get_logheader(cmd)) qalog.write(commandresults + '\n\n\n') connC = connectToDevice(playbookinfo['credsCore']) resultpromptC = connC.find_prompt() for cmd in playbookinfo['listofcommands']: commandresults = connC.send_command(cmd) qalog.write(get_logheader(cmd)) qalog.write(commandresults + '\n\n\n') #resultprompt = conn.config_mode() print(resultprompt) cmd = 'vlan ' + vlan + '\nname Metering-VLAN\n' + 'interface range ' + portrange + '\n' + 'switchport mode access\nswitchport access vlan ' \ + vlan + '\ndescription *** Metering Port ***\nend\n' # cmd = ['interface range ' + portrange, 'switchport mode access', 'switchport access vlan ' + vlan, 'description *** python changed ***'] commandresults = conn.send_config_set(config_commands=cmd) qalog.write(commandresults + '\n\n\n') cmd = 'interface ' + rw.get( 'IDFTrunk' ) + '\n' + 'switchport trunk allowed vlan add ' + vlan + '\n' cmdC = 'interface ' + rw.get( 'CoreTrunk' ) + '\n' + 'switchport trunk allowed vlan add ' + vlan + '\n' commandresults = conn.send_config_set(config_commands=cmd) #print ('conf t results back =', commandresults) print(resultprompt, '\n', commandresults) print('Changing trunk config in Core: ' + playbookinfo['credsCore']['ip'] + '\n') commandresultsC = connC.send_config_set(config_commands=cmdC) print(connC.find_prompt() + '\n' + commandresultsC + '\n') qalog.write(commandresults + '\n\n\n') qalog.write(commandresultsC + '\n\n\n') commandresults = conn.send_command('wr mem') qalog.write(get_logheader('Writing config changes')) qalog.write(commandresults + '\n\n\n') # Log config after changes qalog.write('********** AFTER CHANGES ***********') for cmd in playbookinfo['listofcommands']: commandresults = conn.send_command(cmd) qalog.write(get_logheader(cmd)) qalog.write(commandresults + '\n\n\n') for cmd in playbookinfo['listofcommands']: commandresults = connC.send_command(cmd) qalog.write(get_logheader(cmd)) qalog.write(commandresults + '\n\n\n') qalog.close() conn.disconnect() connC.disconnect() print('Task Complete For: ' + resultprompt[:len(resultprompt) - 1] + '\n\n')
def load_from_excel(excel_file, sheet, token): df = read_excel(excel_file, sheet, index_col=None) df = df.rename(columns={'Topic': 'anchor', 'Selector': 'label', 'Message': 'association'}) load_dataframe(df, sheet, token)
columns = list(psheet.iloc[start_row_num+1,:]) titles.append(psheet.iloc[start_row_num, 0]) end_row_num = start_row_num+1 while row_counts[end_row_num] > 0: end_row_num += 1 table = pd.DataFrame(psheet.iloc[start_row_num+2:end_row_num,:]) table.index = range(end_row_num-start_row_num-2) # No title, no headers table.columns = columns table[columns[0]] = table[columns[0]].fillna(method="ffill") table.replace("-", np.NaN, inplace=True) if extend: table["Geography"] = titles[-1] tables.append(table) return sheet_header, dict(zip(titles, tables)) pwb = read_excel("data/gpearnextime.xlsx", sheetname=None) ws = pwb["8. GPMS Distribution"] h, tt = extract_tables(ws) if __name__ == "__main__": import sys # adul-crit-care-data-eng-apr-13-mar-14-tab.xls pd.options.display.float_format = '{:,.0f}'.format pwb = read_excel("data/gpearnextime.xlsx", sheetname=None) wb = xl.load_workbook("data/gpearnextime.xlsx") #pwb = read_excel("data/adul-crit-care-data-eng-apr-13-mar-14-tab.xlsx", sheetname=None) #wb = xl.load_workbook("data/adul-crit-care-data-eng-apr-13-mar-14-tab.xlsx") tot_tbls = 0 all_tables = {} for sheet_name in wb.sheetnames: if wb.get_sheet_by_name(sheet_name).sheet_state == "hidden":
from pandas.io.excel import read_excel from numpy import mean d = read_excel("e17data.xlsx","Sheet 1 - Table 1") m1 = mean(d['age']) m2 = mean(d['score'])