def test(): df0=DataFrame(np.arange(3)) df0.to_excel('foo.xlsx','Data 0') df1=DataFrame(np.arange(2)) df1.to_excel('foo.xlsx','Data 1') return
def test_freeze_panes(self, engine, ext): # see gh-15160 expected = DataFrame([[1, 2], [3, 4]], columns=["col1", "col2"]) expected.to_excel(self.path, "Sheet1", freeze_panes=(1, 1)) result = pd.read_excel(self.path, index_col=0) tm.assert_frame_equal(result, expected)
def test_fenci(): dfs = [] for i in range(0, 9): f = file('Data/ftags_{}.pkl'.format(i), 'rb') fdist = pickle.load(f) #fdist.plot(50) df = DataFrame(fdist.items(), columns=['关键词', '计数']) df = df.sort_index(by='计数', ascending=False) df.index = range(len(df)) df_plt = df[:30] df_plt = df_plt[::-1] #df_plt['关键词'].apply(lambda x : x.encode('utf8')) print df_plt.head() df_plt.plot(kind='barh', x=df_plt['关键词'], title=classifies[i]) #plt.show() filePath = 'Data/{}.png'.format(classifies[i]) str_name_f = filePath.decode("utf8") plt.savefig(str_name_f, dpi=100) dfs.append((classifies[i],df)) #print df[df[1] > 1] f.close() print 'end' with pd.ExcelWriter('Data/keys.xlsx') as writer: for key, df in dfs: print key df.to_excel(writer, sheet_name=key, index=False)
def LaGouSpiderWithKeyWord(position, city): # 获取总共页数 pageCount = SearchPageCount(position, city) if pageCount == 0: print('抱歉!在您搜索的城市中没有您要找的职位') return totaldata = DataFrame().T for i in range(0, pageCount): url = 'http://www.lagou.com/jobs/positionAjax.json?' params = {'city': city, 'kd': position, 'pn': i+1} url += parse.urlencode(params) data = request.urlopen(url).read() # 读取Json数据 jsondata = json.loads(str(data, encoding='utf-8', errors='ignore'))['content']['result'] for t in list(range(len(jsondata))): jsondata[t]['companyLabelListTotal'] = '-'.join(jsondata[t]['companyLabelList']) jsondata[t].pop('companyLabelList') if t == 0: rdata = DataFrame(Series(data=jsondata[t])).T else: rdata = pd.concat([rdata,DataFrame(Series(data=jsondata[t])).T]) totaldata = pd.concat([totaldata, rdata]) print('正在解析第{0}页...'.format(i+1)) totaldata.to_excel('lagou.xls', sheet_name='sheet1')
def plot_stuff(): pd_list = {} compare_tl = [] compare_tl_head = [] for vars in list_communities(): for var in vars: pd_list.update({var.split("/")[-1].split(".")[0]: DataFrame( sorted(read_csv(var)[["Name", "G"]].values, key=lambda x: x[0], reverse=True))}) N = len(pd_list.keys()) # Find number of elements stats = np.zeros((N, N)) # Create a 2-D Array to hold the stats keys = sorted(pd_list, reverse=True) # Find data sets (Sort alphabetically, backwards) for idx, key in enumerate(keys): # Populate 2-D array for i, val in enumerate(pd_list[key][1].values): if not i == idx: # Ensure self values are set to zero stats[i, idx] = val stats = DataFrame(stats, columns=keys, index=keys) # stats["Mean"] = stats.median(axis=0) # set_trace() stats["Mean"] = find_mean(stats) stats["Std"] = find_std(stats) stats = stats.sort_values(by="Mean", axis=0, ascending=False, inplace=False) print(tabulate(stats, showindex=True, headers=stats.columns, tablefmt="fancy_grid")) print("\n") save_path = os.path.abspath("/".join(var.split("/")[:-2])) method = var.split("/")[-2]+".xlsx" stats.to_excel(os.path.join(save_path, method)) compare_tl.append(stats.sort_index(inplace=False)["Mean"].values.tolist()) compare_tl_head.append(method) # set_trace() compare_tl= DataFrame(np.array(compare_tl).T, columns=compare_tl_head, index=stats.index.sort_values()) save_path_2 = os.path.join(os.path.abspath("/".join(var.split("/")[:-3])), os.path.abspath("".join(var.split("/")[-3]))+".xlsx") compare_tl.to_excel(save_path_2)
def write_to_excel(data): # Model the data as such: # List [ # 0 Principle Account, # 1 Component Principle Account, # 2 Endowment Name, # 3 Long Term, # 4 Temp # 5 Misc # ] variables = [[],[],[],[],[],[],[],[]] col_names = ['Endowment Name', 'Component Principle Account', 'Principle Account', 'Long Term', 'Temp', 'Misc', 'Misc2', 'Misc2'] for i in data: for x, j in enumerate(i): if x > len(variables): variables.append([]) variables[x].append(j) df = DataFrame(variables) df = df.T df.columns = col_names df.to_excel("test.xlsx", sheet_name="Data", index=False)
def chart_templates(): index = get_template_list() index = list(set(index)) t_play = [] t_buy = [] t_producer = [] index.sort() for template in index: buy = 0.0 play = 0.0 producer = db.templates.find_one({'name':template})['producer'] for event in db.events.find({'template':template}): producer = event['producer'] if event['event'] == 'play': play +=1 elif event['event'] == 'purchase': buy +=1 t_producer .append(producer) t_play.append(play) t_buy.append(buy) data = { 'play':t_play, 'buy':t_buy, 'producer':t_producer } chart = DataFrame(data,index=index,columns=['producer','play','buy']) chart['b2p_percent'] = (chart.buy/chart.play)*100 chart.sort_index() chart.to_excel('templates_analysis.xls')
def test_importItems(self): wrongFields = [{"a": "What is your gender?", "b": 0.7, "c": "radio", "d": 0.3, "e": "Male, Female, Other", "f": 'vert'}] wrongOptions = [{"questionText": "What is your gender?", "questionWidth": 0.7, "type": "radio", "responseWidth": 0.3, "options": "Other", "layout": 'vert', "index": 0}] df = DataFrame(self.questions) df.to_excel(fileName_xlsx, index=False) df.to_csv(fileName_csv, index=False) # Check wrong field error with pytest.raises(NameError): self.survey = Form(self.win, items=wrongFields, size=(1.0, 0.3), pos=(0.0, 0.0), autoLog=False) # Check options for list of dicts with pytest.raises(ValueError): self.survey = Form(self.win, items=wrongOptions, size=(1.0, 0.3), pos=(0.0, 0.0), autoLog=False) # Check csv self.survey = Form(self.win, items=fileName_csv, size=(1.0, 0.3), pos=(0.0, 0.0), autoLog=False) # Check Excel self.survey = Form(self.win, items=fileName_xlsx, size=(1.0, 0.3), pos=(0.0, 0.0), randomize=False, autoLog=False)
def test_to_excel_unicode_filename(self): _skip_if_no_xlrd() ext = self.ext filename = u('\u0192u.') + ext try: f = open(filename, 'wb') except UnicodeEncodeError: raise nose.SkipTest('no unicode file names on this system') else: f.close() df = DataFrame([[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], index=['A', 'B'], columns=['X', 'Y', 'Z']) with ensure_clean(filename) as filename: df.to_excel(filename, 'test1', float_format='%.2f') reader = ExcelFile(filename) rs = reader.parse('test1', index_col=None) xp = DataFrame([[0.12, 0.23, 0.57], [12.32, 123123.20, 321321.20]], index=['A', 'B'], columns=['X', 'Y', 'Z']) tm.assert_frame_equal(rs, xp)
def to_excel(self, outfile, units = ''): """Export timing info to excel file. Returns location of exported file. Automatically timestamps filename in format: yyyy-mm-dd-hh:mm:ss Parameters ---------- excel_file: the name of the file to export from. e.g. file.xls units: the units to export the data in; one of ['min', 'sec'] """ if self.data == None or self.units == None: raise DataError('Cannot export; no data!') if units == '': units = self.units try: filename = timestamp(outfile) df = DataFrame(self.get_data(units), columns = ['frame', 'start time', 'duration', 'stop time']) df.to_excel(filename, sheet_name = 'Sheet1', index = False) return filename except IOError: print 'Whoops'
def test_to_excel_unicode_filename(self): _skip_if_no_excelsuite() for ext in ["xls", "xlsx"]: filename = u"\u0192u." + ext try: f = open(filename, "wb") except UnicodeEncodeError: raise nose.SkipTest("no unicode file names on this system") else: f.close() df = DataFrame( [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], index=["A", "B"], columns=["X", "Y", "Z"], ) with ensure_clean(filename) as filename: df.to_excel(filename, "test1", float_format="%.2f") reader = ExcelFile(filename) rs = reader.parse("test1", index_col=None) xp = DataFrame( [[0.12, 0.23, 0.57], [12.32, 123123.20, 321321.20]], index=["A", "B"], columns=["X", "Y", "Z"] ) tm.assert_frame_equal(rs, xp)
class Excel(object): goal_time = 0.2 params = ['openpyxl', 'xlsxwriter', 'xlwt'] param_names = ['engine'] def setup(self, engine): N = 2000 C = 5 self.df = DataFrame(np.random.randn(N, C), columns=['float{}'.format(i) for i in range(C)], index=date_range('20000101', periods=N, freq='H')) self.df['object'] = tm.makeStringIndex(N) self.bio_read = BytesIO() self.writer_read = ExcelWriter(self.bio_read, engine=engine) self.df.to_excel(self.writer_read, sheet_name='Sheet1') self.writer_read.save() self.bio_read.seek(0) self.bio_write = BytesIO() self.bio_write.seek(0) self.writer_write = ExcelWriter(self.bio_write, engine=engine) def time_read_excel(self, engine): read_excel(self.bio_read) def time_write_excel(self, engine): self.df.to_excel(self.writer_write, sheet_name='Sheet1') self.writer_write.save()
def OnExit(self): #Does all of the initialising and saves the flow data intersectionFlowRates = {} #Loop through all of the junctions for i in self._intersectionFlow: #Loop through all of the intervals localFlowRates = [] for x in range(0,len(self._intersectionFlow[i])): localFlowRates.append(3600*self._intersectionFlow[i][x]/constants.CONST_MEASUREMENT_INTERVAL) intersectionFlowRates[i] = localFlowRates intersectionFlowRates['Time'] = range(0,len(intersectionFlowRates[i])*constants.CONST_MEASUREMENT_INTERVAL,constants.CONST_MEASUREMENT_INTERVAL) df = DataFrame(intersectionFlowRates) folderName = constants.CONST_EXPERIMENT_NAME newFolderurl = os.path.abspath(os.path.join(os.getcwd(),os.pardir)) + '\\MyProgram\\Tests\\Experiments\\' + folderName counter = 0 while os.path.exists(newFolderurl): newFolderurl = os.path.abspath(os.path.join(os.getcwd(),os.pardir)) + '\\MyProgram\\Tests\\Experiments\\' + folderName + '_' + str(counter) counter +=1 os.makedirs(newFolderurl) df.to_excel(newFolderurl + '\\' + constants.CONST_FLOW_FILE_NAME, sheet_name='sheet1', index=False) #Saves the settings file as well in the same folder shutil.copyfile(self._configFile, newFolderurl + '\\' + self._configFile.split("\\")[-1])
def chart_voice_format(): voices = get_voice_list() formats = get_format_list() data = [] for voice in voices: v_chart = [] for format in formats: play = 0.0 buy = 0.0 for event in database.db.events.find({'format':format,'voices':voice}): if event['event'] == 'play': play += 1 elif event['event'] == 'purchase': buy +=1 v_chart.append(play) v_chart.append(buy) data.append(v_chart) f_list = [] k_list = [] for format in get_format_list(): f_list.append(format) f_list.append(format) k_list.append('play') k_list.append('buy') col = pd.MultiIndex.from_arrays([f_list,k_list],names=['format','type']) chart = DataFrame(data,columns=col,index=get_voice_list()) chart.to_excel('files/voice_format_analysis.xls') return chart
def test_excelfile_fspath(self): with tm.ensure_clean('foo.xlsx') as path: df = DataFrame({"A": [1, 2]}) df.to_excel(path) xl = ExcelFile(path) result = os.fspath(xl) assert result == path
def to_excel(self, puts_path='/tmp/puts.xls', calls_path='/tmp/calls.xls'): dataframe = DataFrame(data=self.puts) dataframe.to_excel(puts_path) print 'Puts saved at %s' % (puts_path) dataframe = DataFrame(data=self.calls) dataframe.to_excel(calls_path) print 'Calls saved at %s' % (calls_path)
def test_inf_roundtrip(self, engine, ext): df = DataFrame([(1, np.inf), (2, 3), (5, -np.inf)]) df.to_excel(self.path, "test1") reader = ExcelFile(self.path) recons = pd.read_excel(reader, "test1", index_col=0) tm.assert_frame_equal(df, recons)
def generate_excel(self): data = self.generate_data() if data: dataframe = DataFrame(data) dataframe.to_excel('data.xlsx', sheet_name='sheet1', index=False) print "Generated Excel Sheet with name data.xlsx" else: print "No data found"
def test_excel_multiindex_index(self, ext): # MultiIndex as index works so assert no error #9794 cols = MultiIndex.from_tuples([('site', ''), ('2014', 'height'), ('2014', 'weight')]) df = DataFrame(np.random.randn(3, 10), index=cols) with ensure_clean(ext) as path: df.to_excel(path, index=False)
def test_inf_roundtrip(self): _skip_if_no_xlrd() frame = DataFrame([(1, np.inf), (2, 3), (5, -np.inf)]) with ensure_clean(self.ext) as path: frame.to_excel(path, 'test1') reader = ExcelFile(path) recons = reader.parse('test1') tm.assert_frame_equal(frame, recons)
def _write_columns_to_excel(c1, c2, c3): weekday_list = _build_date_list_weeks() padded_c2, padded_c3 = _pad_other_lists(weekday_list, c1, c2, c3) dtstr = datetime.now().strftime("%Y%m%d-%H%M%S") file_name = 'output/work_log_{}.xlsx'.format(dtstr) df = DataFrame({'Date': weekday_list, 'Ticket Name': padded_c2, 'Ticket Data': padded_c3}) # print(df) df.to_excel(file_name, sheet_name='sheet1', index=False)
def inicio(base,x=1, p=5100): tipo=[] adi_n=[] origem=[] relator=[] dispositivo=[] entrada=[] distribuicao=[] requerente=[] requerido=[] emenda=[] requerentep=[] adi_error=[] fundamenta=[] rliminar=[] dpliminar=[] rfinal=[] dfinal=[] dmliminar=[] dmfinal=[] incidentes=[] ementa=[] index=[] for n in range(x, p): list=stfWorm(base, n) if (list==False): continue tipo.append(base) adi_n.append(list[0]) origem.append(list[1]) relator.append(list[2]) entrada.append(list[3]) distribuicao.append(list[4]) requerente.append(list[5]) requerido.append(list[6]) dispositivo.append(list[7]) fundamenta.append(list[8]) rliminar.append(list[9]) dpliminar.append(list[10]) rfinal.append(list[11]) dfinal.append(list[12]) dmliminar.append(list[13]) dmfinal.append(list[14]) incidentes.append(list[15]) ementa.append(list[16]) index.append(list[17]) print(list[7]) if list[7]!=None and ( ('Emenda' or 'emenda' or 'EMENDA') in list[7] ) : emenda.append(1) else: emenda.append(0) requerentep.append(requerente_p(list[5])) print(n) print(adi_n, origem, relator, entrada, distribuicao, requerente, requerido, dispositivo, emenda, requerentep) df = DataFrame({'01_tipo': tipo, '02_numero': adi_n, '03_Origem': origem, '04_relator': relator, '05_data_entrada': entrada, '06_distribuicao': distribuicao, '07_requerente': requerente, '08_requerido': requerido, '09_dispositivo_legal': dispositivo, '10_emenda': emenda, '11_requerente_p': requerentep, '12_fundamentacao': fundamenta, '13_resultadoliminar': rliminar, '14_decisao_plenaria_liminar': dpliminar, '15_resultado_final': rfinal, '16_decisao_final': dfinal, '17_decisao_monocratica_liminar': dmliminar, '18_decisao_monocratica_final':dmfinal, '19_incidentes':incidentes, '20_ementa':ementa, '21_indexacao':index}) print(df) df.to_excel('teste1.xlsx', sheet_name='sheet1', index=False)
def produce_imbalance_evolution(simulation=simulation, year_min = 1996): arrays=arange(year_min, year_min+60) record = DataFrame(index=arrays) record['déséquilibre'] = NaN record['déséquilibre_alt'] = NaN # for year in range(year_min, year_min+60): # print year # #On tente de tronquer la df au fil du temps # try: # simulation.aggregate_pv = simulation.aggregate_pv.drop(labels=year-1, level='year') # simulation.aggregate_pv_alt = simulation.aggregate_pv_alt.drop(labels=year-1, level='year') # except: # print 'except path' # pass # simulation.aggregate_pv = AccountingCohorts(simulation.aggregate_pv) # simulation.aggregate_pv_alt = AccountingCohorts(simulation.aggregate_pv_alt) # # ratio_base = simulation.compute_gen_imbalance(typ='net_transfers') # ratio_alt = simulation.compute_gen_imbalance(typ='net_transfers', default=False) # record.loc[year, "déséquilibre"] = ratio_base # record.loc[year, 'déséquilibre_alt'] = ratio_alt # print record.head(30).to_string() # record.to_excel(xls+'imbalance_flux.xlsx', 'flux de déséquilibre') for year in range(year_min, year_min+60): print year #On tente de tronquer la df au fil du temps try: simulation.population = simulation.population.drop(labels=year-1, level='year') simulation.population_alt = simulation.population_alt.drop(labels=year-1, level='year') except: print 'except path' pass taxes_list = ['tva', 'tipp', 'cot', 'irpp', 'impot', 'property'] payments_list = ['chomage', 'retraite', 'revsoc', 'maladie', 'educ'] simulation.create_cohorts() simulation.create_cohorts(default=False) simulation.cohorts.compute_net_transfers(name = 'net_transfers', taxes_list = taxes_list, payments_list = payments_list) simulation.create_present_values('net_transfers', default=True) simulation.cohorts_alt.loc[[x>=2027 for x in simulation.cohorts_alt.index.get_level_values(2)], 'retraite'] *= (1-0.1) simulation.cohorts_alt.compute_net_transfers(name = 'net_transfers', taxes_list = taxes_list, payments_list = payments_list) simulation.create_present_values('net_transfers', default=False) # imbalance = simulation.compute_gen_imbalance(typ='net_transfers') imbalance = simulation.compute_gen_imbalance(typ='net_transfers') imbalance_alt = simulation.compute_gen_imbalance(typ='net_transfers', default=False) record.loc[year, "déséquilibre"] = imbalance record.loc[year, 'déséquilibre_alt'] = imbalance_alt record.to_excel(xls+'\imbalance_flux_agre.xlsx', 'flux de déséquilibre')
def test_float_types(self, engine, ext, np_type): # Test np.float values read come back as float. df = DataFrame(np.random.random_sample(10), dtype=np_type) df.to_excel(self.path, "test1") reader = ExcelFile(self.path) recons = pd.read_excel(reader, "test1", index_col=0).astype(np_type) tm.assert_frame_equal(df, recons, check_dtype=False)
def study_GetBehILSStatus(SubjectsSummary,study,studydata,writer): # for i in range(0,len(SubjectsSummary),1): # print "%d %s"%(i,SubjectsSummary[i].Subject.subid) conn_qc = studydata.db_connection Measures = ['iLS6_propHT'] full_list = [] # create the column names for the output spreadsheet ColumnNames = [] ColumnNames.append('subid') for meas in Measures: ColumnNames.append(meas) for j in range(0,len(SubjectsSummary),1): one_list = [] one_list.append(SubjectsSummary[j].Subject.subid) # search through the COlumn Name list for index in range(1,len(ColumnNames),1): task = ColumnNames[index] sqlcommand = "SELECT subid" sqlcommand="%s,%s"%(sqlcommand,task) sqlcommand=sqlcommand+" FROM cnsdivdb.iLSBehav where subid='%s'"%(SubjectsSummary[j].Subject.subid) # print sqlcommand try: # ret is the number of rows returned ret = conn_qc.cur.execute(sqlcommand) if ret is 0: one_list.append(-9999) elif ret is 1: row_dict = conn_qc.cur.fetchall() value = row_dict[0][ColumnNames[index]] if value > 0: one_list.append(1) else: one_list.append(0) elif ret > 1: MultipleRowFlag = True for row_dict in conn_qc.cur.fetchall(): # value from first row if MultipleRowFlag: value = row_dict[ColumnNames[index]] if value > 0: one_list.append(1) MultipleRowFlag = False if MultipleRowFlag: # if this flag is still true then there are multiple # rows in the dB with no data in them one_list.append(-9999) except: one_list.append(-9999) full_list.append(one_list) df=DataFrame(full_list, columns=ColumnNames) df.to_excel(writer,sheet_name="BehavioralData",index=False) return SubjectsSummary,df
def test_swapped_columns(self, engine, ext): # Test for issue #5427. write_frame = DataFrame({'A': [1, 1, 1], 'B': [2, 2, 2]}) write_frame.to_excel(self.path, 'test1', columns=['B', 'A']) read_frame = pd.read_excel(self.path, 'test1', header=0) tm.assert_series_equal(write_frame['A'], read_frame['A']) tm.assert_series_equal(write_frame['B'], read_frame['B'])
def test_excel_raise_error_on_multiindex_columns_and_no_index( self, ext): # MultiIndex as columns is not yet implemented 9794 cols = MultiIndex.from_tuples([('site', ''), ('2014', 'height'), ('2014', 'weight')]) df = DataFrame(np.random.randn(10, 3), columns=cols) with pytest.raises(NotImplementedError): with ensure_clean(ext) as path: df.to_excel(path, index=False)
def study_GetStatsStatus(SubjectsSummary,study,writer): spmVer = 'spm8' HeaderFlag = True # cycle over subjects # this will hold data for all subjects full_list = [] for j in range(0,len(SubjectsSummary),1): Str = SubjectsSummary[j].Subject.subid # this will hold data for ONE subject one_list = [] one_list.append(SubjectsSummary[j].Subject.subid) # create the column names ONCE if HeaderFlag is True: ColumnNames = [] ColumnNames.append('subid') for index in SubjectsSummary[0].TaskList: ColumnNames.append(SubjectsSummary[j].Scans[index]['SeriesName']) ColumnNames.append("AllStatsProcessed") count = 0 for index in SubjectsSummary[0].TaskList: task = SubjectsSummary[j].Scans[index]['SeriesName'] # strip run numbers from the task name task=task.split('_r')[0] foundStatsFlag = False for visit in SubjectsSummary[j].Subject.visitlist: filePath=os.path.join(visit.path,'fmriStats',task,spmVer,'spmT_0001.img') if os.path.exists(filePath): foundStatsFlag = True if foundStatsFlag is True: one_list.append(1) count = count + 1 else: one_list.append(0) count = count + 0 # append the one subject list to the full list if count is len(SubjectsSummary[0].TaskList): one_list.append("TRUE") else: one_list.append("FALSE") full_list.append(one_list) print Str # now create the data frame for pandas df=DataFrame(full_list, columns=ColumnNames) # format it for Excel and write it to a file df.to_excel(writer,sheet_name="StatsData",index=False) # write out codes codes = [] codes.append([0,'stats NOT done']) codes.append([1,'stats done']) CodeColNames = ['code','description'] df_Notes=DataFrame(codes,columns=CodeColNames) df_Notes.to_excel(writer,sheet_name="StatsData",index=False) return df
def test_excel_roundtrip_bool(self): _skip_if_no_openpyxl() # Test roundtrip np.bool8, does not seem to work for xls path = "__tmp_excel_roundtrip_bool__.xlsx" frame = DataFrame(np.random.randn(10, 2)) >= 0 frame.to_excel(path, "test1") reader = ExcelFile(path) recons = reader.parse("test1") tm.assert_frame_equal(frame, recons) os.remove(path)
def test_float_types(self): _skip_if_no_xlrd() for np_type in (np.float16, np.float32, np.float64): with ensure_clean(self.ext) as path: # Test np.float values read come back as float. frame = DataFrame(np.random.random_sample(10), dtype=np_type) frame.to_excel(path, 'test1') reader = ExcelFile(path) recons = reader.parse('test1').astype(np_type) tm.assert_frame_equal(frame, recons, check_dtype=False)
class MusicBrainzArtistsBelgium(object): def __init__(self, update=False): self.update = update self.aantal_concerten_per_mbid = None set_useragent("kunstenpunt", "0.1", "github.com/kunstenpunt") self.lijst = None self.genres = {} def calculate_concerts_abroad(self): concerts = read_excel("./output/latest.xlsx") concerts_abroad_future = concerts[ -((concerts["land_clean"] == "Belgium") | (concerts["land_clean"].isnull())) & (concerts["datum"] >= datetime(2010, 1, 1))] self.aantal_concerten_per_mbid = concerts_abroad_future.groupby( ["artiest_mb_id"])["event_id"].count() def make_genre_mapping(self): for row in self.lijst.iterrows(): key = row[1]["mbid"] value = row[1]["maingenre"] self.genres[key] = value def load_list(self): self.lijst = read_excel("./resources/belgian_mscbrnz_artists.xlsx") # TODO make a list of musicbrainz ids that refer to the same platform url, e.g. toots thielemans and toots # TODO thielemans quartet both refer to the same songkick url; store these musicbrainz ids in the object so that # TODO the ids can be used to detect duplicate concerts @staticmethod def __get_land(artist): if "area" in artist: area = artist["area"] recurse = True while recurse: recurse = False areas = get_area_by_id( area["id"], includes="area-rels")["area"]["area-relation-list"] for test_area in areas: if "direction" in test_area and test_area[ "direction"] == "backward": area = test_area["area"] recurse = True return area else: return {} @staticmethod def __get_rel_url(artist, urltype, domain=None): urls = [] if "url-relation-list" in artist: for url in artist["url-relation-list"]: if url["type"] == urltype: if domain: if domain in url["target"]: urls.append(url["target"]) else: urls.append(url["target"]) return ",".join(urls) @staticmethod def __get_drop_url(artist, domain): urls = [] if "url-relation-list" in artist: for url in artist["url-relation-list"]: if domain in url["target"]: urls.append(url["target"]) return ",".join(urls) @staticmethod def __make_artist_name(artist_name): return sub(r"[^\w\s\d]", "", sub(r"[\[\(].+?[\)\]]", "", artist_name)).strip() @staticmethod def __get_parts_of(area_id): part_of_ids = [] areas = None while areas is None: try: sleep(1.0) areas = get_area_by_id( area_id, includes="area-rels")["area"]["area-relation-list"] except musicbrainz.NetworkError: sleep(25.0) except musicbrainz.ResponseError: sleep(25.0) for area in areas: if area["type"] == "part of" and "direction" not in area: part_of_ids.append((area["area"]["id"], area["area"]["name"])) return part_of_ids @staticmethod def __search_artists_in_area(area, limit, offset): artists = {"artist-list": [], "artist-count": -1} while artists["artist-count"] < 0: try: sleep(1.0) artists_area = search_artists(area=area, beginarea=area, endarea=area, limit=limit, offset=offset) artists['artist-list'] = artists_area["artist-list"] artists['artist-count'] = artists_area["artist-count"] except musicbrainz.NetworkError: sleep(25.0) except musicbrainz.ResponseError: sleep(25.0) return artists def __number_of_concerts(self, mbid): try: return self.aantal_concerten_per_mbid.loc[mbid] except KeyError: return 0 @staticmethod def __is_on_ignore_list(mbid): ignore_list = read_excel("./resources/ignore_list.xlsx") return mbid in ignore_list["mbid"] def _obtain_a_specific_mb_artist(self, mbid): artist = None while artist is None: try: sleep(1.0) artist = get_artist_by_id(mbid, includes=["url-rels"])["artist"] except musicbrainz.NetworkError as e: print("musicbrainz netwerkerror", e) sleep(25.0) except musicbrainz.Response: sleep(25.0) print("adding", artist["name"]) return self.mb_lijn(artist) def update_list(self): area_ids = [("5b8a5ee5-0bb3-34cf-9a75-c27c44e341fc", "Belgium")] new_parts = self.__get_parts_of(area_ids[0][0]) area_ids.extend(new_parts) while len(new_parts) > 0: new_new_parts = [] for new_part in new_parts: print("nieuwe locatie", new_part[1]) parts = self.__get_parts_of(new_part[0]) new_new_parts.extend(parts) area_ids.extend(parts) new_parts = new_new_parts belgium = [] for area_id in area_ids: print("finding artists in", area_id) offset = 0 limit = 100 total_search_results = 1 while offset < total_search_results: search_results = self.__search_artists_in_area( area_id[1], limit, offset) for hit in list(search_results["artist-list"]): if ("area" in hit and hit["area"]["id"] == area_id[0]) or ( "begin-area" in hit and hit["begin-area"]["id"] == area_id[0]) or ( "end-area" in hit and hit["end-area"]["id"] == area_id[0]): lijn = self._obtain_a_specific_mb_artist(hit["id"]) belgium.append(lijn) offset += limit total_search_results = search_results["artist-count"] for mbid in read_excel("./resources/grace_list.xlsx")["mbid"].values: lijn = self._obtain_a_specific_mb_artist(mbid) belgium.append(lijn) self.lijst = DataFrame(belgium).drop_duplicates(subset="mbid") self.lijst.to_excel("./resources/belgian_mscbrnz_artists.xlsx") def mb_lijn(self, hit): return { "band": hit["name"], "mbid": hit["id"], "area": hit["area"]["name"] if "area" in hit else None, "begin-area": hit["begin-area"]["name"] if "begin-area" in hit else None, "end-area": hit["end-area"]["name"] if "end-area" in hit else None, "begin": hit["life-span"]['begin'] if "life-span" in hit and "begin" in hit["life-span"] else None, "end": hit["life-span"]["end"] if "life-span" in hit and "end" in hit["life-span"] else None, "ended": hit["life-span"]["ended"] if "life-span" in hit and "ended" in hit["life-span"] else None, "disambiguation": hit["disambiguation"] if "disambiguation" in hit else None, "facebook": str(self.__get_rel_url(hit, "social network", "facebook.com")), "songkick": str(self.__get_rel_url(hit, "songkick")), "bandsintown": str(self.__get_rel_url(hit, "bandsintown")), "setlist": str(self.__get_rel_url(hit, "setlistfm")), "spotify": str(self.__get_drop_url(hit, "spotify")), "bandcamp": str(self.__get_drop_url(hit, "bandcamp")), "itunes": str(self.__get_drop_url(hit, "itunes")), "soundcloud": str(self.__get_drop_url(hit, "soundcloud")), "deezer": str(self.__get_drop_url(hit, "deezer")), "youtube": str(self.__get_drop_url(hit, "youtube")), "number_of_concerts": self.__number_of_concerts(hit["id"]), "on_ignore_list": self.__is_on_ignore_list(hit["id"]), "maingenre": self.genres[hit["id"]] if hit["id"] in self.genres else None }
def _save(self, data: pd.DataFrame) -> None: writer = pd.ExcelWriter(self._filepath, engine=self._engine) data.to_excel(writer, **self._save_args) writer.save()
def write_excel(df: pd.DataFrame, path: str, **kwargs: Any) -> None: df.to_excel(path, **kwargs)
def generate_gated_table(gated_df: pd.DataFrame, gated_path: str) -> None: """Generates table with gated population, i.e. same as the input file except for the gated cells.""" gated_df.to_excel(gated_path, sheet_name='Gated Population')
def get_one_result_allfiles(path): # data = pd.read_excel("result.xlsx", sheet_name='Sheet1') # data = {'name':['fly','yang'], 'age':[25,14]} files = glob('log/need_process/*') print(len(files)) name = [] epoc = [] val_loss = [] val_acc = [] rank_1 = [] rank_5 = [] rank_10 = [] map_ = [] rerank_1 = [] rerank_5 = [] rerank_10 = [] remap = [] ''' train Loss: 0.0018 Acc: 0.9946 val Loss: 0.0138 Acc: 0.9174 Training complete in 140m 59s Best val epoch: 84 Best val Loss: 0.0139 Acc: 0.924101 -------test----------- top1:0.918349 top5:0.971793 top10:0.985451 mAP:0.789647 calculate initial distance Reranking complete in 1m 4s top1:0.931413 top5:0.965855 top10:0.977138 mAP:0.904607 ''' for file in files: print(file) f = open(file, 'r') r = f.readlines() # print(file.split('/')[-2]+'/'+file.split('/')[-1]) if len(r) < 10: print('len(r) = %s' % len(r)) continue if 'top1:' not in r[-4]: print('top1: not in r[-4]') continue if 'top1:' not in r[-1]: print('top1: not in r[-4]') continue name.append(file.split('/')[-2] + '/' + file.split('/')[-1]) # if 'Best val epoch' in r[-7]: # print(r[-7].split(':')[-1].strip()) # if 'Best val Loss' in r[-6]: # print(r[-6].split(':')[1].strip().split('A')[0].strip()) # print(r[-6].split(':')[2].strip()) # if 'top1:' in r[-4]: # print(r[-4].split(':')[1].split('t')[0].strip()) # print(r[-4].split(':')[2].split('t')[0].strip()) # print(r[-4].split(':')[3].split('m')[0].strip()) # print(r[-4].split(':')[4].strip()) # if 'top1:' in r[-1]: # print(r[-1].split(':')[1].split('t')[0].strip()) # print(r[-1].split(':')[2].split('t')[0].strip()) # print(r[-1].split(':')[3].split('m')[0].strip()) # print(r[-1].split(':')[4].strip()) # if 'Best val epoch' in r[-7]: # epoc.append(r[-7].split(':')[-1].strip()) # if 'Best val Loss' in r[-6]: # val_loss.append(r[-6].split(':')[1].strip().split('A')[0].strip()) # val_acc.append(r[-6].split(':')[2].strip()) if 'top1:' in r[-4]: rank_1.append(r[-4].split(':')[1].split('t')[0].strip()) rank_5.append(r[-4].split(':')[2].split('t')[0].strip()) rank_10.append(r[-4].split(':')[3].split('m')[0].strip()) map_.append(r[-4].split(':')[4].strip()) if 'top1:' in r[-1]: rerank_1.append(r[-1].split(':')[1].split('t')[0].strip()) rerank_5.append(r[-1].split(':')[2].split('t')[0].strip()) rerank_10.append(r[-1].split(':')[3].split('m')[0].strip()) remap.append(r[-1].split(':')[4].strip()) # data = {'name': name, 'epoc': epoc, 'val_loss': val_loss, 'val_acc': val_acc, 'rank_1': rank_1, 'rank_5': rank_5, # 'rank_10': rank_10, 'map': map_, 'rerank_1': rerank_1, 'rerank_5': rerank_5, 'rerank_10': rerank_10, # 'remap': remap} data = { 'name': name, 'rank_1': rank_1, 'rank_5': rank_5, 'rank_10': rank_10, 'map': map_, 'rerank_1': rerank_1, 'rerank_5': rerank_5, 'rerank_10': rerank_10, 'remap': remap } print(data) frame = DataFrame(data) print(frame) frame.to_excel('log/result.xlsx')
links = re.findall('<p class="title">.*?<a href="(.*?)" title=.*?', res.text, re.S) for date_time, id, title, price, link in zip(date_times, ids, titles, prices, links): data = { 'day': day, 'b_time': date_time.strip().split('-')[0], 'e_time': date_time.strip().split('-')[1], 'id': id.strip().split(':')[1], 'title': title.strip(), 'price': price.strip().split('¥')[1].split('.')[0], 'link': link.strip() } #print(data) print(day, date_time.strip().split('-')[0], date_time.strip().split('-')[1], id.strip().split(':')[1], title.strip(), price.strip().split('¥')[1].split('.')[0], link.strip()) datas.append(data) time.sleep(2) df = DataFrame(datas) df.to_excel( './hao24.xlsx', sheet_name='hao24', index=False, columns=['day', 'b_time', 'e_time', 'id', 'title', 'price', 'link'], encoding='utf-8')
'sex': ['female', 'male', 'male', 'female', None] } df1 = DataFrame(data) print(df1) df2 = DataFrame(data, index=['wxh', 'tsb', 'tzl', 'txy', 'txy'], columns=['Chinese', 'Math', 'English', 'sex']) default_values = {'Chinese': 0, 'sex': 'unknown'} df2.fillna(default_values, inplace=True) print(df2) # excel表格 print('-' * 35 + 'xlsx' + '-' * 35) score = DataFrame(pd.read_excel('score.xlsx', index_col=0)) print(score) df2.to_excel('data.xlsx') # 删除行或列 print('-' * 35 + '删除行或列' + '-' * 35) df3 = df2.drop(columns='Math') print(f'df2:\n{df2}') print(f'df2:\n{df3}') df4 = df3.drop(index='wxh') print(f'df4:\n{df4}') # 重命名行或列 print('-' * 35 + '重命名列名' + '-' * 35) df3 = df2.rename(columns={'Chinese': '语文'}) print(f'df2:\n{df2}') print(f'df3:\n{df3}')
equipments = [ex['equipment'] for ex in exercises] mechanics = [ex['mechanics'] for ex in exercises] levels = [ex['level'] for ex in exercises] types = [ex['exercise_type'] for ex in exercises] forces = [ex['force'] for ex in exercises] intrsuctions = [ex['instructions'] for ex in exercises] videos = [ex['video'] for ex in exercises] secondaries = [ex['secondary'] for ex in exercises] image_ones = [ex['image_1'] for ex in exercises] image_twos = [ex['image_2'] for ex in exercises] final_data = { 'names': names, 'target': targets, 'type': types, 'equipment': equipments, 'mechanics': mechanics, 'video': videos, 'force': forces, 'level': levels, 'instructions': intrsuctions, 'secondary': secondaries, 'image_1': image_ones, 'image_2': image_twos } df = DataFrame(final_data) print(df) df.to_excel('muscle_and_strength.xlsx', index=True, sheet_name='data')
from pandas import Series, DataFrame data = [ [112000, 112500, 109500, 110500], [114000, 114500, 110500, 111000], [113000, 115000, 112000, 115000], [111500, 112500, 110000, 111500], [111000, 114000, 109500, 112000], ] columns = ["시가", "고가", "저가", "종가"] index = ["2019-06-05", "2019-06-04", "2019-06-03", "2019-05-31", "2019-05-30"] df = DataFrame(data=data, index=index, columns=columns) df.to_excel("data.xlsx", sheet_name="035420", index=False)
import pandas as pd from pandas import DataFrame final = [] df = pd.read_excel( r'C:\Users\Amgh\PycharmProjects\eskandari\persian_date.xlsx', usecols=[1]) date = df.values.tolist() print(date) flat_date = [] for sublist in date: for item in sublist: flat_date.append(item) cleanedList = [x for x in flat_date if str(x) != 'nan'] for date in cleanedList: Sp_date = date.split('-') if len(Sp_date[2]) == 1: Sp_date[2] = '0' + Sp_date[2] if len(Sp_date[1]) == 1: Sp_date[1] = '0' + Sp_date[1] final.append(Sp_date[0] + '/' + Sp_date[1] + '/' + Sp_date[2]) df_final = DataFrame(final, columns=['date']) df_final.to_excel("states.xlsx")
def df_to_xlsx_bytes(df: pd.DataFrame, byte_to_file_func=BytesIO): towrite = byte_to_file_func() df.to_excel(towrite, index=False) towrite.seek(0) return towrite.getvalue()
def writeExcel(self): PTdata = pointTable.query.all() scoredata = score.query.all() userdata = user.query.all() sheet1data = { 'Home': [], 'Away': [], 'Deadline': [], 'Score': [], 'Division': [], 'Level': [] } sheet2data = { 'Player': [], 'Played': [], 'Win': [], 'Loss': [], 'Tie': [], 'Bonus': [], 'Points': [], 'Xrating': [], 'Gamesplayed': [], 'Gameswon': [], 'Set1Played': [], 'Set1Won': [], 'Set2Played': [], 'Set2Won': [], 'Set3Played': [], 'Set3Won': [] } sheet3data = { 'Username': [], 'PlayerID': [], 'Email': [], 'Phone': [], 'Password': [] } for elem in scoredata: sheet1data['Home'].append(elem.player_id1) sheet1data['Away'].append(elem.player_id2) sheet1data['Deadline'].append(elem.deadline) sheet1data['Score'].append(elem.score) sheet1data['Division'].append(elem.division) sheet1data['Level'].append(elem.level) for elem in PTdata: sheet2data['Player'].append(elem.player_id) sheet2data['Played'].append(elem.played) sheet2data['Win'].append(elem.win) sheet2data['Loss'].append(elem.loss) sheet2data['Tie'].append(elem.tie) sheet2data['Bonus'].append(elem.bonus) sheet2data['Points'].append(elem.xrating) sheet2data['Xrating'].append(elem.points) sheet2data['Gamesplayed'].append(elem.gamesplayed) sheet2data['Gameswon'].append(elem.gameswon) sheet2data['Set1Played'].append(elem.set1played) sheet2data['Set1Won'].append(elem.set1won) sheet2data['Set2Played'].append(elem.set2played) sheet2data['Set2Won'].append(elem.set2won) sheet2data['Set3Played'].append(elem.set3played) sheet2data['Set3Won'].append(elem.set3won) for elem in userdata: sheet3data['Username'].append(elem.username) sheet3data['PlayerID'].append(elem.firstName) sheet3data['Email'].append(elem.email) sheet3data['Phone'].append(elem.phone) sheet3data['Password'].append(elem.password_hash) df = DataFrame( sheet1data, columns=['Home', 'Away', 'Deadline', 'Score', 'Division', 'Level']) df1 = DataFrame(sheet2data, columns=[ 'Player', 'Played', 'Win', 'Loss', 'Tie', 'Bonus', 'Points', 'Xrating', 'Gamesplayed', 'Gameswon', 'Set1Played', 'Set1Won', 'Set2Played', 'Set2Won', 'Set3Played', 'Set3Won' ]) df2 = DataFrame( sheet3data, columns=['Username', 'PlayerID', 'Email', 'Phone', 'Password']) EXCEL_NAME = str(datetime.now().date()) + '.xlsx' with ExcelWriter(self.filename + EXCEL_NAME) as writer: df.to_excel(writer, sheet_name='Score') df1.to_excel(writer, sheet_name='Point Table') df2.to_excel(writer, sheet_name='Users') print(str(datetime.now().date()))
# 3、合理性 # 处理:删除非ASCII字符 df['first_name'].replace({r'[^\x00-\x7F]+': ''}, regex=True, inplace=True) df['last_name'].replace({r'[^\x00-\x7F]+': ''}, regex=True, inplace=True) # 重命名column,使得列表适合需要 df.rename(columns={ 'weight': 'Weight(kgs)', 'first_name': 'First_Name', 'last_name': 'Last_Name' }, inplace=True) # 列名重新排序 cols = [ 'No', 'First_Name', 'Last_Name', 'Age', 'Weight(kgs)', 'M1', 'M2', 'M3', 'F1', 'F2', 'F3' ] df = df.filter(cols, axis=1) # 删除原来的No列 df.drop('No', axis=1, inplace=True) # 重置index df.index = range(len(df)) print(df) df.to_excel('data3.xlsx')
"Turn angle bins: ": turn_angle_bins[1:], "Turn angle densities: ": turn_angle_densties, 'Center distance densities bins: ': center_distance_bins[1:], 'Center distance densities: ': center_distance_densities, 'Interbout interval bins:': interbout_interval_bins[1:], 'Interbout interval densities:': interbout_interval_densities }) dframe.to_excel('{}_histograms.xlsx'.format(fish_name), sheet_name='histograms', index=False) dframe = DataFrame({ "Median forward swims: ": [np.nanmedian(np.abs(bout_path_changes))], "Median absolute turn angle: ": [np.nanmedian(np.abs(bout_angle_changes))], "Median distance to the center: ": [np.nanmedian(center_distance)], "Median Interbout interval: ": [np.nanmean(interbout_intervals)], }) dframe.to_excel('{}_medians.xlsx'.format(fish_name), sheet_name='medians', index=False) # make an excel file
# open json file with open(file_path) as json_file: json_data = json.load(json_file) # get array of document document_array = json_data["document"] # print(type(document_array)) # print(document_array) # get array of utterance utterance_array = document_array[0]["utterance"] # get all sentence formList = list() original_formList = list() noteList = list() for item in utterance_array: formList.append(item["form"]) original_formList.append(item["original_form"]) noteList.append(item["note"]) excelDataFrame = DataFrame({ "form": formList[:], "original_form": original_formList[:], "note": noteList[:] }) # make excel file excelDataFrame.to_excel("jsonToExcel.xlsx")
def _save_df(self, function: str, df: DataFrame, **kwargs) -> None: """Save Pandas DataFrame to a file type given a 'function'.""" # Get the alias for the 'function' so filenames are short short_function = self._function_alias(function) # Get the 'parameters' from the last AV api call since it was successful parameters = self.last() dt_now = datetime.now().strftime(Ymd_format) report_freq = kwargs.pop("report_freq", None) # Determine Path if function == "CURRENCY_EXCHANGE_RATE": # ok path = f"{self.export_path}/{parameters['from_currency']}{parameters['to_currency']}" elif function in [ "FXD", "FXM", "FXW", "FX_DAILY", "FX_MONTHLY", "FX_WEEKLY" ]: path = f"{self.export_path}/{parameters['from_symbol']}{parameters['to_symbol']}_{short_function.replace('FX', '')}" elif function in ["FXI", "FX_INTRADAY"]: path = f"{self.export_path}/{parameters['from_symbol']}{parameters['to_symbol']}_{parameters['interval']}" elif function in [ "CD", "CW", "CM", "DIGITAL_CURRENCY_DAILY", "DIGITAL_CURRENCY_WEEKLY", "DIGITAL_CURRENCY_MONTHLY" ]: path = f"{self.export_path}/{parameters['symbol']}{parameters['market']}_{short_function.replace('C', '')}" elif function == "TIME_SERIES_INTRADAY_EXTENDED": ie_slice = re_sub(r'month', "M", re_sub(r'year', "Y", parameters['slice'])) ie_adjusted = "_ADJ" if parameters['adjusted'] == "true" else "" path = f"{self.export_path}/{parameters['symbol']}_{short_function}_{parameters['interval']}_{ie_slice}{ie_adjusted}" elif function == "OVERVIEW": # path = f"{self.export_path}/{parameters['symbol']}" elif function == "SYMBOL_SEARCH": # path = f"{self.export_path}/SEARCH_{parameters['keywords']}" elif function == "INCOME_STATEMENT": # if isinstance(report_freq, str): path = f"{self.export_path}/{parameters['symbol']}_IS_{report_freq}" elif function == "BALANCE_SHEET": # if isinstance(report_freq, str): path = f"{self.export_path}/{parameters['symbol']}_BS_{report_freq}" elif function == "CASH_FLOW": # if isinstance(report_freq, str): path = f"{self.export_path}/{parameters['symbol']}_CF_{report_freq}" elif function == "CRYPTO_RATING": # path = f"{self.export_path}/{parameters['symbol']}_RATING" elif function == "TIME_SERIES_INTRADAY": # i_adjusted = "_ADJ" if parameters['adjusted'] == "true" else "" path = f"{self.export_path}/{parameters['symbol']}_{parameters['interval']}{i_adjusted}" elif short_function.startswith("C") and len(short_function) == 2: path = f"{self.export_path}/{parameters['symbol']}{parameters['market']}" elif function in self.__api_indicator: path = f"{self.export_path}/{parameters['symbol']}_{parameters['interval'][0].upper()}_{short_function}" if "series_type" in parameters: path += f"_{parameters['series_type'][0].upper()}" if "time_period" in parameters: path += f"_{parameters['time_period']}" elif function == "EARNINGS_CALENDAR": if "symbol" in parameters: path = f"{self.export_path}/EARNINGS_{parameters['symbol']}_{parameters['horizon'].upper()}_{dt_now}" else: path = f"{self.export_path}/EARNINGS_{parameters['horizon'].upper()}_{dt_now}" elif function == "IPO_CALENDAR": path = f"{self.export_path}/IPOS_{dt_now}" elif function == "LISTING_STATUS": _state = "" if parameters["state"] == "active" else "DE" path = f"{self.export_path}/{_state}LISTED_{dt_now}" if "date" in parameters and parameters["date"] is not None: path += f"_FOR_{parameters['date']}" else: path = f"{self.export_path}/{parameters['symbol']}_{short_function}" path += f".{self.output}" # Export desired format if self.output == "csv": df.to_csv(path) elif self.output == "json": df.to_json(path) elif self.output == "pkl": df.to_pickle(path) elif self.output == "html": df.to_html(path) elif self.output == "txt": Path(path).write_text(df.to_string()) elif excel and self.output == "xlsx": df.to_excel(path, sheet_name=parameters["function"])
distances = f['distances'] neighbors = f['neighbors'] test = f['test'] train = f['train'] variance_record = [] mean_record = [] for j in range(20): print(j) time_record = [] for index, i in enumerate(test): search_begin = time.time() idx = hnsw_n.search(i, 10) # pprint.pprint(idx) search_end = time.time() search_time = search_end - search_begin time_record.append(search_time * 1000) variance_n = np.var(time_record) mean_n = np.mean(time_record) pprint.pprint('variance: %f' % variance_n) pprint.pprint('mean: %f' % mean_n) variance_record.append(variance_n) mean_record.append(mean_n) data = {'mean_balanced': mean_record, 'variance_balanced': variance_record} df = DataFrame(data) df.to_excel('variance_result_balanced_8.xlsx')
output_data = OutputData() parameters = [] obj_functions = [] best_obj_fun = 1000000 num_of_rows_hm = 10 hm_pitch_adjusting_rate = 0.45 for parameter in range(5, 100, 10): parameter = float(parameter) * 0.01 vals = [] for i in range(5): x = harmony_search_algorithm( num_of_rows_hm=num_of_rows_hm, num_of_iterations=500000, hm_considering_rate=0.95, hm_pitch_adjusting_rate=hm_pitch_adjusting_rate, hm_bandwidth=parameter, opt_problem=optimization_problem, out_data=output_data) vals.append(x[0].val_of_object_fun) parameters.append(parameter) obj_functions.append(sum(vals) / len(vals)) df1 = DataFrame({'parameter': parameters, 'mean_of_obj_fun': obj_functions}) df1.to_excel( 'C:/Users/Artur/PycharmProjects/Harmony Search Algorithm/data/tests/TSP/test_BW_parameter.xlsx', index=False) print("execution time: " + str(time.time() - start))
def export_to_excel_file(data_frame: pd.DataFrame, file_name: str): excel_writer = pd.ExcelWriter(file_name, engine="xlsxwriter") data_frame.to_excel(excel_writer) excel_writer.save()
def getdetails(vcenter, **kwargs): ''' This function fetches host details and also VM inside each host. It operates on 3 modes: 1. options=0 : fetches all Esxi hosts, all VM's under each Esxi host 2. options=1 : fetches all Esxi hosts only 3. options=2 : Fetches all details (VM's and Esxi) based on user input. Provide the list of Esxi hosts to fetch details. Use attribute "esxilist" to provide list of esxi host. Note: The name should exactly match with what present in vcenter ''' datacenter = vcenter.content.rootFolder.childEntity Esxi_df = DataFrame(columns=[ 'Esxi_Name', 'Total_CPU_Cores', 'Logical_CPU_Core', 'CPU(in GHz)', 'Total Memory(in Gb)', 'UCS_Model', 'UCS_Vendor', 'CIMC_Version', 'Status', 'DataStores', 'Network(NIC)' ]) esxi_vm_dict = {} if int(kwargs['options']) == 0: for dc in datacenter: hostFolder = dc.hostFolder.childEntity ## fetching details for each esxi returing Esxi_df as finalised output for esxihost in hostFolder: df2 = get_host_details(esxihost) Esxi_df = Esxi_df.append(df2, ignore_index=True) ## fetching VM details from each Esxi host for esxihost in hostFolder: df3 = get_vm_details(esxihost) esxi_vm_dict[esxihost.name] = df3 elif int(kwargs['options']) == 1: for dc in datacenter: hostFolder = dc.hostFolder.childEntity ## fetching details for each esxi returing Esxi_df as finalised output for esxihost in hostFolder: #print (esxihost) df2 = get_host_details(esxihost) Esxi_df = Esxi_df.append(df2, ignore_index=True) elif int(kwargs['options']) == 2: ## checking list of esxi's provided if 'esxilist' in kwargs: for dc in datacenter: hostFolder = dc.hostFolder.childEntity ## fetching details for each esxi returing Esxi_df as finalised output for esxihost in hostFolder: if esxihost.name in kwargs['esxilist']: df2 = get_host_details(esxihost) Esxi_df = Esxi_df.append(df2, ignore_index=True) ## fetching VM details from each Esxi host for esxihost in hostFolder: if esxihost.name in kwargs['esxilist']: df3 = get_vm_details(esxihost) esxi_vm_dict[esxihost.name] = df3 else: print( "Please provide list of esxi's using esxilist attribute of getdetails function" ) sys.exit with ExcelWriter('vcenter_details.xlsx') as writer: Esxi_df.to_excel(writer, sheet_name='Esxi_Details', index=False) for key, value in esxi_vm_dict.items(): value.to_excel(writer, sheet_name=key, index=False)
try: driver.find_element_by_xpath( '//*[@id="body-content"]/div/div/div[1]/div[2]/div[2]/div[1]/div[4]/button[2]/div[2]/div/div' ).click() time.sleep(0.2) #element = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="body-content"]/div/div/div[1]/div[2]/div[2]/div[1]/div[4]/button[2]/div[2]/div/div'))) except: continue html = driver.page_source # 페이지의 elements모두 가져오기 soup = BeautifulSoup(html, 'html.parser') # BeautifulSoup사용하기 reviews = soup.find_all("div", attrs={"class": "single-review"}) print(i) dt_result = DataFrame() for review in reviews: star = review.find("div", attrs={ "class": "tiny-star" }).get('aria-label').replace("별표 5개 만점에 ", "").replace("개로 평가했습니다.", "") review_text = review.find("div", attrs={ "class": "review-body" }).text.replace("전체 리뷰", "") result = [star, review_text] dt_result = dt_result.append([result]) dt_result.to_excel('play_store_review_' + id + '.xlsx', sheet_name='Sheet1', engine='xlsxwriter')
d = {'a': 1, 'b': 2, 'c': 3, 'd': 4} x3 = Series(d) print(x3) data = { 'Chinese': [66, 95, 93, 90, 80], 'English': [65, 85, 92, 88, 90], 'Math': [30, 98, 96, 77, 90] } df1 = DataFrame(data) df2 = DataFrame( data, index=['ZhangFei', 'GuanYu', 'ZhaoYun', 'HuangZhong', 'DianWei']) print(df1) print(df2) df2.to_excel('df2.xlsx') df2 = df2.drop(index=['GuanYu', 'DianWei']) print(df2) df2.rename(columns={'Chinese': 'YuWen', 'English': 'Yingyu'}, inplace=True) print(df2) print(df2['YuWen']) df2['YuWen'].astype('str') # df2['YuWen'].astype(np.int64) print(df2) print(df2['YuWen']) def plus(df, n, m): df['new1'] = (df[u'YuWen'] + df[u'Yingyu']) * m
def main(): excelfile = "/Users/ChiYuChen/Intro to Machine Learning and Data Mining/Assignment 4/Assignment_4_Data_and_Template_Original.xlsx" sheetname = "Training Data" # df = readExcel(excelfile, sheetname=sheetname, startrow=2, endrow=6601, endcol=17) # print df # print df[0] # print df[-1] X = np.array(readExcel(excelfile, sheetname=sheetname, startrow=2, endrow=6601, endcol=15), dtype=np.int32) # print X Xa = np.insert(X, 0, 1, axis=1) # print Xa T = np.array(readExcel(excelfile, sheetname=sheetname, startrow=2, endrow=6601, startcol=16, endcol=16), dtype=np.int32) # print T T2n = np.array(readExcel(excelfile, sheetname=sheetname, startrow=2, endrow=6601, startcol=17, endcol=17), dtype=np.int32) # print T2n mapping = [ [1, -1, -1, -1, -1, -1], [-1, 1, -1, -1, -1, -1], [-1, -1, 1, -1, -1, -1], [-1, -1, -1, 1, -1, -1], [-1, -1, -1, -1, 1, -1], [-1, -1, -1, -1, -1, 1] ] T2 = np.array([mapping[row[0]] for row in T2n], dtype=np.int32) # print T2 Xapi = np.linalg.pinv(Xa) # print Xapi W = np.dot(Xapi, T) # print W W2 = np.dot(Xapi, T2) # print W2 sheetname = "To be classified" Xt = np.array(readExcel(excelfile, sheetname=sheetname, startrow=5, endrow=54, endcol=15), dtype=np.int32) # print Xt # print Xt[0] Xta = np.insert(Xt, 0, 1, axis=1) # print Xta T_testing = np.dot(Xta, W) # print T_testing T_testing = np.array([1 if T_testing[i][0] > 0 else -1 for i in range(len(T_testing))], dtype=np.int32) # print T_testing T2_testing = np.dot(Xta, W2) # print T2_testing T2_testing = np.argmax(T2_testing, axis=1) # print T2_testing T_validating = np.dot(Xa, W) # print T_validating T_validating = np.array([1 if T_validating[i][0] > 0 else -1 for i in range(len(T_validating))], dtype=np.int32) # print T_validating T2_validating = np.dot(Xa, W2) # print T2_validating T2_validating = np.argmax(T2_validating, axis=1) # print T2_validating bccm = np.zeros((2, 2), dtype=np.int32) for i in range(len(T)): row = 1 if T[i, 0] == 1 else 0 col = 1 if T_validating[i] == 1 else 0 # print row, col bccm[row, col] += 1 print bccm metrics = [ np.divide(bccm[0, 0] + bccm[1, 1], bccm[0, 0] + bccm[0, 1] + bccm[1, 0] + bccm[1, 1], dtype=np.float64), np.divide(bccm[1, 1], bccm[1, 0] + bccm[1, 1], dtype=np.float64), np.divide(bccm[0, 0], bccm[0, 0] + bccm[0, 1], dtype=np.float64), np.divide(bccm[1, 1], bccm[0, 1] + bccm[1, 1], dtype=np.float64) ] print metrics mcccm = np.zeros((6, 6), dtype=np.int32) for i in range(len(T2n)): row = T2n[i, 0] col = T2_validating[i] # print row, col mcccm[row, col] += 1 print mcccm sumup = np.sum(mcccm, axis=0) # print sumup ppv = np.zeros(6) for i in range(6): ppv[i] = np.divide(mcccm[i, i], sumup[i], dtype=np.float64) print ppv ppvmetrics = [[max(ppv), np.argmax(ppv)], [min(ppv), np.argmin(ppv)]] print ppvmetrics from pandas import DataFrame, ExcelWriter from openpyxl import load_workbook excelfile = "/Users/ChiYuChen/Intro to Machine Learning and Data Mining/Assignment 4/Assignment_4_Data_and_Template_Updated.xlsx" book = load_workbook(excelfile) writer = ExcelWriter(excelfile, engine='openpyxl') writer.book = book writer.sheets = dict((ws.title, ws) for ws in book.worksheets) sheetname = "Classifiers" # print W df = DataFrame(W) df.to_excel(writer, sheet_name=sheetname, startrow=4, startcol=0, header=False, index=False) # print W2 df = DataFrame(W2) df.to_excel(writer, sheet_name=sheetname, startrow=4, startcol=4, header=False, index=False) sheetname = "To be classified" # print Tt df = DataFrame(T_testing) df.to_excel(writer, sheet_name=sheetname, startrow=4, startcol=15, header=False, index=False) # print Tt2 df = DataFrame(T2_testing) df.to_excel(writer, sheet_name=sheetname, startrow=4, startcol=16, header=False, index=False) sheetname = "Performance" # print bccm df = DataFrame(bccm) df.to_excel(writer, sheet_name=sheetname, startrow=9, startcol=2, header=False, index=False) # print metrics df = DataFrame(metrics) df.to_excel(writer, sheet_name=sheetname, startrow=7, startcol=6, header=False, index=False) # print mcccm df = DataFrame(mcccm) df.to_excel(writer, sheet_name=sheetname, startrow=18, startcol=2, header=False, index=False) # print ppvmetrics df = DataFrame(ppvmetrics) df.to_excel(writer, sheet_name=sheetname, startrow=19, startcol=11, header=False, index=False) writer.save() writer.close()
def test_styler_to_excel(engine): def style(df): # XXX: RGB colors not supported in xlwt return DataFrame( [ ["font-weight: bold", "", ""], ["", "color: blue", ""], ["", "", "text-decoration: underline"], ["border-style: solid", "", ""], ["", "font-style: italic", ""], ["", "", "text-align: right"], ["background-color: red", "", ""], ["number-format: 0%", "", ""], ["", "", ""], ["", "", ""], ["", "", ""], ], index=df.index, columns=df.columns, ) def assert_equal_style(cell1, cell2, engine): if engine in ["xlsxwriter", "openpyxl"]: pytest.xfail(reason=("GH25351: failing on some attribute " "comparisons in {}".format(engine))) # XXX: should find a better way to check equality assert cell1.alignment.__dict__ == cell2.alignment.__dict__ assert cell1.border.__dict__ == cell2.border.__dict__ assert cell1.fill.__dict__ == cell2.fill.__dict__ assert cell1.font.__dict__ == cell2.font.__dict__ assert cell1.number_format == cell2.number_format assert cell1.protection.__dict__ == cell2.protection.__dict__ def custom_converter(css): # use bold iff there is custom style attached to the cell if css.strip(" \n;"): return {"font": {"bold": True}} return {} pytest.importorskip("jinja2") pytest.importorskip(engine) # Prepare spreadsheets df = DataFrame(np.random.randn(11, 3)) with ensure_clean(".xlsx" if engine != "xlwt" else ".xls") as path: writer = ExcelWriter(path, engine=engine) df.to_excel(writer, sheet_name="frame") df.style.to_excel(writer, sheet_name="unstyled") styled = df.style.apply(style, axis=None) styled.to_excel(writer, sheet_name="styled") ExcelFormatter(styled, style_converter=custom_converter).write( writer, sheet_name="custom") writer.save() if engine not in ("openpyxl", "xlsxwriter"): # For other engines, we only smoke test return openpyxl = pytest.importorskip("openpyxl") wb = openpyxl.load_workbook(path) # (1) compare DataFrame.to_excel and Styler.to_excel when unstyled n_cells = 0 for col1, col2 in zip(wb["frame"].columns, wb["unstyled"].columns): assert len(col1) == len(col2) for cell1, cell2 in zip(col1, col2): assert cell1.value == cell2.value assert_equal_style(cell1, cell2, engine) n_cells += 1 # ensure iteration actually happened: assert n_cells == (11 + 1) * (3 + 1) # (2) check styling with default converter # XXX: openpyxl (as at 2.4) prefixes colors with 00, xlsxwriter with FF alpha = "00" if engine == "openpyxl" else "FF" n_cells = 0 for col1, col2 in zip(wb["frame"].columns, wb["styled"].columns): assert len(col1) == len(col2) for cell1, cell2 in zip(col1, col2): ref = "%s%d" % (cell2.column, cell2.row) # XXX: this isn't as strong a test as ideal; we should # confirm that differences are exclusive if ref == "B2": assert not cell1.font.bold assert cell2.font.bold elif ref == "C3": assert cell1.font.color.rgb != cell2.font.color.rgb assert cell2.font.color.rgb == alpha + "0000FF" elif ref == "D4": assert cell1.font.underline != cell2.font.underline assert cell2.font.underline == "single" elif ref == "B5": assert not cell1.border.left.style assert (cell2.border.top.style == cell2.border.right.style == cell2.border.bottom.style == cell2.border.left.style == "medium") elif ref == "C6": assert not cell1.font.italic assert cell2.font.italic elif ref == "D7": assert cell1.alignment.horizontal != cell2.alignment.horizontal assert cell2.alignment.horizontal == "right" elif ref == "B8": assert cell1.fill.fgColor.rgb != cell2.fill.fgColor.rgb assert cell1.fill.patternType != cell2.fill.patternType assert cell2.fill.fgColor.rgb == alpha + "FF0000" assert cell2.fill.patternType == "solid" elif ref == "B9": assert cell1.number_format == "General" assert cell2.number_format == "0%" else: assert_equal_style(cell1, cell2, engine) assert cell1.value == cell2.value n_cells += 1 assert n_cells == (11 + 1) * (3 + 1) # (3) check styling with custom converter n_cells = 0 for col1, col2 in zip(wb["frame"].columns, wb["custom"].columns): assert len(col1) == len(col2) for cell1, cell2 in zip(col1, col2): ref = "%s%d" % (cell2.column, cell2.row) if ref in ("B2", "C3", "D4", "B5", "C6", "D7", "B8", "B9"): assert not cell1.font.bold assert cell2.font.bold else: assert_equal_style(cell1, cell2, engine) assert cell1.value == cell2.value n_cells += 1 assert n_cells == (11 + 1) * (3 + 1)
#print(final_list_service) for sep_list in final_list_service: for inn_list in sep_list: f_final_service.append(inn_list) for sep_list in final_list_wsdl: for inn_list in sep_list: f_final_wsdl.append(inn_list) for sep_list in final_list_config: for inn_list in sep_list: f_final_config.append(inn_list) for sep_list in final_list_datasource: for inn_list in sep_list: f_final_datasource.append(inn_list) os.chdir("C:\\Temp\\excel\\") dataframe = DataFrame({ 'Service Name': f_final_service, 'Dependent Service': f_final_wsdl, 'JCA': f_final_config, 'DataSource': f_final_datasource }) #dataframe = DataFrame([final_list_service]) #print(dataframe) #print(len(final_list_service)) #print(final_list_service[1][1]) dataframe.to_excel('test.xlsx', sheet_name='sheet1', index=False)
print("[INFO] approx. FPS: {:.2f}".format(fps.fps())) print('[INFO] Each cycle time taken = %0.5fs'%(cycle_end-cycle_start)) print('----------------------------------------------------------------------') cap.release() cv2.destroyAllWindows() total_sum.append(sum_ch1) ###write dataframes and export to an Excel file check = 0 title = [] for j in range(len(total_sum)): if check < len(total_sum[j]):check = len(total_sum[j]) title.append('Run %i '%(j+1)+str(file[j])) index=np.arange(0,check,1) for k in range(len(total_sum)): if len(total_sum[k]) < check: for l in range(len(total_sum[k]),check): total_sum[k].append(0) TTotal_sum = list(map(list, zip(*total_sum))) #print(TTotal_sum) df = DataFrame(data=TTotal_sum, columns = title) savefile = asksaveasfilename(filetypes=(("Excel files", "*.xlsx"),("All files", "*.*") )) df.to_excel(savefile+".xlsx", index=False, sheet_name="Results")
def get_one_result_onefile(path): files = glob(path) files = np.sort(files) print(len(files)) epoc = [] val_loss = [] val_acc = [] name = [] rank_1 = [] rank_5 = [] rank_10 = [] map_ = [] rerank_1 = [] rerank_5 = [] rerank_10 = [] remap = [] ''' train Loss: 0.0018 Acc: 0.9946 val Loss: 0.0138 Acc: 0.9174 Training complete in 140m 59s Best val epoch: 84 Best val Loss: 0.0139 Acc: 0.924101 -------test----------- top1:0.918349 top5:0.971793 top10:0.985451 mAP:0.789647 calculate initial distance Reranking complete in 1m 4s top1:0.931413 top5:0.965855 top10:0.977138 mAP:0.904607 ''' for file in files: print(file) f = open(file, 'r') r = f.readlines() # print(file.split('/')[-2]+'/'+file.split('/')[-1]) if len(r) < 6: print('len(r) = %s' % len(r)) continue # if 'ratio' not in r[-13]: # print('ratio not in r[-13]') # continue if 'top1:' not in r[-4]: print('top1: not in r[-4]') continue if 'top1:' not in r[-1]: print('top1: not in r[-4]') continue re_flag = False ratio_flag = False for i in range(len(r)): if 'Best val epoch' in r[i]: epoc.append(r[i].split(':')[-1].strip()) ratio_flag = True if 'Best val Acc' in r[i]: val_acc.append(r[i].split(':')[1].strip()) ratio_flag = True if 'Best val Loss' in r[i]: val_loss.append(r[i].split(':')[1].strip()) ratio_flag = True if 'ratio' in r[i] and ratio_flag: name.append(file.split('/')[-1] \ + ' ' + str(int(100*float(r[i].split('=')[1].strip()))) + 'th feature') if 'top1:' in r[i]: if not re_flag: rank_1.append(r[i].split(':')[1].split('t')[0].strip()) rank_5.append(r[i].split(':')[2].split('t')[0].strip()) rank_10.append(r[i].split(':')[3].split('m')[0].strip()) map_.append(r[i].split(':')[4].strip()) if 'calculate' in r[i + 1] and 'top1' in r[i + 3]: re_flag = True else: print('error!') rerank_1.append('-') rerank_5.append('-') rerank_10.append('-') remap.append('-') continue else: rerank_1.append(r[i].split(':')[1].split('t')[0].strip()) rerank_5.append(r[i].split(':')[2].split('t')[0].strip()) rerank_10.append(r[i].split(':')[3].split('m')[0].strip()) remap.append(r[i].split(':')[4].strip()) re_flag = False print('len(name) = %d' % len(name)) print('len(rank_1) = %d' % len(rank_1)) print('len(rank_5) = %d' % len(rank_5)) print('len(rank_10) = %d' % len(rank_10)) print('len(map_) = %d' % len(map_)) print('len(rerank_1) = %d' % len(rerank_1)) print('len(rerank_5) = %d' % len(rerank_5)) print('len(rerank_10) = %d' % len(rerank_10)) print('len(remap) = %d' % len(remap)) data = { # 'best_epoc': epoc, 'best_acc': val_acc, # 'best_loss': val_loss, 'name': name, 'rank_1': rank_1, 'rank_5': rank_5, 'rank_10': rank_10, 'map': map_, 'rerank_1': rerank_1, 'rerank_5': rerank_5, 'rerank_10': rerank_10, 'remap': remap } print(data) frame = DataFrame(data) print(frame) frame.to_excel('log/result.xlsx')
console.rule("[yellow]正在查询 %s 的情报信息" % ip, style="yellow") main(ip, config_path, proxies) elif args.file: with open(args.file) as f: f = f.readlines() ip_list = [] for i in f: i = i.strip() if '.' in i: ip_list.append(i) num = 0 ip_len = len(ip_list) for i in ip_list: num = num + 1 console.rule("[yellow]正在查询 %s 的情报信息,剩余 %s 个IP" % (i, ip_len - num), style="yellow") main(i, config_path, proxies) print() else: console.log('[yellow][INFO] 请输入待扫描的 IP 或 IP 列表文件') sys.exit() df = DataFrame(pools, columns=[ 'ip', 'IP是否存活', 'IP 可能开放端口', '是否为恶意IP', '危害程度', '威胁类型', '标签', '标签类型', '场景', 'IP基本信息', 'IP地理位置', '情报可信度', '域名', '注册人', '注册邮箱', '注册商', '注册时间', '到期时间' ]) df.to_excel(tig_output) time.sleep(1) console.log('[green][SUCC] 结果已保存至 %s' % tig_output)
def test_read_excel_multiindex_empty_level(self, ext): # see gh-12453 with tm.ensure_clean(ext) as path: df = DataFrame({ ("One", "x"): { 0: 1 }, ("Two", "X"): { 0: 3 }, ("Two", "Y"): { 0: 7 }, ("Zero", ""): { 0: 0 }, }) expected = DataFrame({ ("One", "x"): { 0: 1 }, ("Two", "X"): { 0: 3 }, ("Two", "Y"): { 0: 7 }, ("Zero", "Unnamed: 4_level_1"): { 0: 0 }, }) df.to_excel(path) actual = pd.read_excel(path, header=[0, 1], index_col=0) tm.assert_frame_equal(actual, expected) df = pd.DataFrame({ ("Beg", ""): { 0: 0 }, ("Middle", "x"): { 0: 1 }, ("Tail", "X"): { 0: 3 }, ("Tail", "Y"): { 0: 7 }, }) expected = pd.DataFrame({ ("Beg", "Unnamed: 1_level_1"): { 0: 0 }, ("Middle", "x"): { 0: 1 }, ("Tail", "X"): { 0: 3 }, ("Tail", "Y"): { 0: 7 }, }) df.to_excel(path) actual = pd.read_excel(path, header=[0, 1], index_col=0) tm.assert_frame_equal(actual, expected)
def generate_summary_table(summary_df: pd.DataFrame, summary_path: str) -> None: """Generates table with summary of each file generated by SCOUTS and their meaning (i.e. how they were generated).""" summary_df.to_excel(summary_path, sheet_name='Summary', index=False)