def __init__(self,dados=None): self.df = DataFrame() # DataFrame do .cpp self.colunas = {} # Dicionario de colunas com seus respectivos tipos self.shape = [0,0] # Quantidade de linhas x colunas self.indices = [] # Colunas indexadas # Codigo não aceita valores None. if dados != None: for coluna in dados: self.InserirColuna(dados[coluna],str(coluna))
def Copy(self): print self.currentChoose self.dataModel2 = DataFrameModel() self.tableView.setModel(self.dataModel2) names2 = [ self.df.columns[self.columnChooseIndex[tab] + 6] for tab in range(len(self.columnChooseIndex)) ] self.dataModel2.setDataFrame(DataFrame(self.df[names2[0:len(names2)]]))
def parallelization(Dir, Par=10): Thr_list = [1, 2, 4, 8, 12] time_list = [] for Thr in Thr_list: file = data.get_filename(Dir, Thr=Thr, Par=Par) df, time = data.initiate_df(file) time_list.append(float(time)) #plt.close('all') plt.figure() # plt.title('Time elapsed per number of threads for 10 Particles and $2^{21}$ MC-cycles') plt.plot(Thr_list, time_list, 'o') plt.grid() plt.xlabel(r'Number of Threads') plt.ylabel(r'$t\ [s]$') plt.legend(['Time elapsed']) save_fig("time_per_thread.png") plt.show()
def analytical_vs_numerical(Dir, Importance=False, BB=False, NumDeriv=False, Thr=8, Dim=3, Par=10, MC_Cycles=2097152): Dir += f"{Dim}D/" if not NumDeriv: Dir += "Analytical/" else: Dir += "Numerical/" df = data.get_filename(Dir, Importance=Importance, BB=BB, NumDeriv=NumDeriv, Dim=Dim, Par=Par, MC_Cycles=MC_Cycles) # print(f"filname: {df}") df, time = data.initiate_df(df) return df, time
def parsing(): global data_map, vns, vts, stack token = next_token() top = stack.peek() while top != '$': if top == token: print('匹配 ' + top) stack.pop() token = next_token() elif top == 'ε': stack.pop() elif DataFrame.is_vt(top): print('error, is_vt') print(stack) break elif data_map.loc[top][token] == '': print('error, 空') print(stack) break elif data_map.loc[top][token] in productions: print('输出 ' + data_map.loc[top][token]) stack.pop() reverse_production_body_to_stack(data_map.loc[top][token]) top = stack.peek()
class DataFrameMc(): """Classe DataFrame. Pode ser inicializada recebendo um dicionario como {'coluna1':[1,10],'coluna2':['valor1','valor2']}""" def __init__(self,dados=None): self.df = DataFrame() # DataFrame do .cpp self.colunas = {} # Dicionario de colunas com seus respectivos tipos self.shape = [0,0] # Quantidade de linhas x colunas self.indices = [] # Colunas indexadas # Codigo não aceita valores None. if dados != None: for coluna in dados: self.InserirColuna(dados[coluna],str(coluna)) def InserirColuna(self,valores,nome_coluna): """ Infere o tipo do dado. Caso encontre multiplos, transforma em string. Valores devem ser passados dentro de uma lista. """ if all(isinstance(x, int) for x in valores): if self.shape == [0,0]: self.df.InserirColunaInt(valores,nome_coluna) self.colunas[nome_coluna] ='int' self.shape[0] = len(valores) self.shape[1] += 1 elif self.shape[0] != len(valores): raise Exception("Coluna com tamanhos diferentes.") else: self.df.InserirColunaInt(valores,nome_coluna) self.colunas[nome_coluna] ='int' self.shape[1] += 1 elif all(isinstance(x, (float,int)) for x in valores): if self.shape == [0,0]: self.df.InserirColunaDouble(valores,nome_coluna) self.colunas[nome_coluna] ='double' self.shape[0] = len(valores) self.shape[1] += 1 elif self.shape[0] != len(valores): raise Exception("Coluna com tamanhos diferentes.") else: self.df.InserirColunaDouble(valores,nome_coluna) self.colunas[nome_coluna] ='double' self.shape[1] += 1 else: if not all(isinstance(x, str) for x in valores): valores = [str(i) for i in list(valores)] if self.shape == [0,0]: self.df.InserirColunaString(valores,nome_coluna) self.colunas[nome_coluna] ='string' self.shape[0] = len(valores) self.shape[1] += 1 elif self.shape[0] != len(valores): raise Exception("Coluna com tamanhos diferentes.") else: self.df.InserirColunaString(valores,nome_coluna) self.colunas[nome_coluna] ='string' self.shape[1] += 1 def GetColuna(self,nome_coluna): """ Recebe uma coluna e retorna uma lista contendo os valores daquela coluna. """ if self.colunas[nome_coluna] == 'int': return self.df.GetColunaInt(nome_coluna) elif self.colunas[nome_coluna] == 'double': return self.df.GetColunaDouble(nome_coluna) elif self.colunas[nome_coluna] == 'string': return self.df.GetColunaString(nome_coluna) def RemoverColuna(self,nome_coluna): """ Remove coluna do DataFrame. """ if self.colunas[nome_coluna] == 'int': self.df.RemoverColunaInt([],nome_coluna) self.colunas.pop(nome_coluna) self.shape[1] = self.shape[1] - 1 if nome_coluna in self.indices: RemoverIndice(nome_coluna) elif self.colunas[nome_coluna] == 'double': self.df.RemoverColunaDouble([],nome_coluna) self.colunas.pop(nome_coluna) self.shape[1] = self.shape[1] - 1 if nome_coluna in self.indices: RemoverIndice(nome_coluna) elif self.colunas[nome_coluna] == 'string': self.df.RemoverColunaString([],nome_coluna) self.colunas.pop(nome_coluna) self.shape[1] = self.shape[1] - 1 if nome_coluna in self.indices: RemoverIndice(nome_coluna) def RemoverLinha(self, linhas): self.df.RemoverLinha(linhas) self.shape[0] = self.shape[0] - len(linhas) indices = list(self.indices) for i in indices: self.IndexarColuna(i) def GetLoc(self,linha, nome_coluna): """ Recebe lista de linhas e UMA coluna e retorna os respectivos dados. """ linhas = [] if type(linha) == int: linhas.append(linha) else: linhas = list(linha) if max(linhas) > self.shape[0]-1: raise Exception("Linha fornecida excedeu o DataFrame") if self.colunas[nome_coluna] == 'int': return self.df.GetLinhaInt(linhas, nome_coluna) elif self.colunas[nome_coluna] == 'double': return self.df.GetLinhaDouble(linhas, nome_coluna) elif self.colunas[nome_coluna] == 'string': return self.df.GetLinhaString(linhas, nome_coluna) def GetLinha(self, linha): """ Recebe lista de linhas e retorna dados. """ resultado = {k: [] for k in self.colunas} if len(linha) > 0: for nome_coluna in self.colunas: resultado[nome_coluna].append(self.GetLoc(linha, nome_coluna)) resultado = {k:v[0] for k,v in resultado.items()} return resultado def GetDados(self): """ Retorna todo os dataframe em formato de dicionario. """ return self.GetLinha(range(0,self.shape[0])) def Slice(self, linhas,nome_colunas): """ Recebe lista de linhas e lista de colunas e retorna os dados respectivos em formato de dicionario. """ if len(linhas) == 0: linhas = list(range(0,self.shape[0])) if len(nome_colunas) == 1: slc = self.GetLoc(linhas,nome_coluna2) else: slc = {k:[] for k in nome_colunas} for k in slc: slc[k] = self.GetLoc(linhas,k) return slc def Show(self, dados): # Recebe dicionario e printa em formato tabular print(tabulate(dados,tablefmt='simple',headers='keys')) def InserirLinha(self,valores): num_linhas = [len(valores[i]) for i in valores][0] if len(set([len(valores[i]) for i in valores]))>1: raise Exception("Colunas com tamanhos de linhas diferentes") for nome_coluna in self.colunas: if self.colunas[nome_coluna] == 'int': self.df.InserirLinhaInt(valores[nome_coluna],nome_coluna) elif self.colunas[nome_coluna] == 'double': self.df.InserirLinhaDouble(valores[nome_coluna],nome_coluna) elif self.colunas[nome_coluna] == 'string': self.df.InserirLinhaString(valores[nome_coluna],nome_coluna) self.shape[0] += num_linhas def IndexarColuna(self, nome_coluna): """ Recebe nome de coluna e indexa. """ if self.colunas[nome_coluna] == 'int': self.df.IndexarColunaInt([], nome_coluna) self.indices.append(nome_coluna) self.indices = list(sorted(set(self.indices))) elif self.colunas[nome_coluna] == 'double': self.df.IndexarColunaDouble([], nome_coluna) self.indices.append(nome_coluna) self.indices = list(sorted(set(self.indices))) elif self.colunas[nome_coluna] == 'string': self.df.IndexarColunaString([], nome_coluna) self.indices.append(nome_coluna) self.indices = list(sorted(set(self.indices))) def RemoverIndice(self, nome_coluna): """ Remove indice. """ if self.colunas[nome_coluna] == 'int': self.df.RemoverIndiceInt([], nome_coluna) self.indices.remove(nome_coluna) elif self.colunas[nome_coluna] == 'double': self.df.RemoverIndiceDouble([], nome_coluna) self.indices.remove(nome_coluna) elif self.colunas[nome_coluna] == 'string': self.df.RemoverIndiceString([], nome_coluna) self.indices.remove(nome_coluna) def Query_Tree(self, nome_coluna, operador,valor): """ Usuario deve usar funcao Query. Essa eh um funcao auxiliar para fazer query na arvore binaria. """ if self.colunas[nome_coluna] == 'int': return self.df.QueryTreeInt([valor],nome_coluna, operador) elif self.colunas[nome_coluna] == 'double': return self.df.QueryTreeDouble([valor],nome_coluna, operador) elif self.colunas[nome_coluna] == 'string': return self.df.QueryTreeString([valor],nome_coluna, operador) def Query_Simples(self, nome_coluna, operador, valor): """ Usuario deve usar funcao Query. Essa eh um funcao auxiliar para fazer query sem arvore. """ if self.colunas[nome_coluna] == 'int': return self.df.QuerySimpleInt([valor],nome_coluna, operador) elif self.colunas[nome_coluna] == 'double': return self.df.QuerySimpleDouble([valor],nome_coluna, operador) elif self.colunas[nome_coluna] == 'string': return self.df.QuerySimpleString([valor],nome_coluna, operador) def Query(self, nome_coluna, operador, valor): # Funcao para fazer query no dataframe. Operadores sao '==','<','<=','>','>=' if nome_coluna in self.indices: return self.Query_Tree(nome_coluna,operador,valor) else: return self.Query_Simples(nome_coluna, operador, valor) def QueryRect(self, queryrect, nome_coordenada1, nome_coordenada2): """ Exige que colunas de coordenadas sejam de double. queryrect = [xmin,ymin,xman,yman] """ if self.colunas[nome_coordenada1] != 'double' or self.colunas[nome_coordenada2] != 'double': raise Exception('Colunas com coordenadas devem ser de doubles') if nome_coordenada1 in self.indices and nome_coordenada2 in self.indices: if len(queryrect) != 4 or not all(isinstance(x, (float,int)) for x in queryrect): raise Exception('queryrect deve ser uma lista com 4 valores numericos') return self.df.QueryRect(queryrect, nome_coordenada1, nome_coordenada2) else: raise Exception("Colunas de coordenadas precisam estar indexadas") # Funções de Visualização dos Dados (gráficos) # Funcao abaixo faz scatter plots e line plots. # Podem ser utilizados os mesmo argumentos que para o pyplot do matplotlib def Plot(self,nome_coluna1,nome_coluna2, *args,**kwargs): x = self.GetColuna(nome_coluna1) y = self.GetColuna(nome_coluna2) plt.plot(x,y,*args,**kwargs) def Hist(self,nome_coluna1, *args,**kwargs): x = self.GetColuna(nome_coluna1) plt.hist(x,*args,**kwargs) def Hist2D(self,nome_coluna1, nome_coluna2,*args,**kwargs): x = self.GetColuna(nome_coluna1) y = self.GetColuna(nome_coluna2) plt.hist2d(x,y,*args,**kwargs) def BarPlot(self, coluna_altura, coluna_posicao, tipo='soma',*args, **kwargs): altura = self.GetColuna(coluna_altura) posicao = self.GetColuna(coluna_posicao) categorias = sorted(list(set(self.GetColuna(coluna_posicao)))) soma = {k:0 for k in categorias} contador = {k:0 for k in categorias} media = {k:0 for k in categorias} for cat in categorias: for i in range(len(self.GetColuna(coluna_altura))): if posicao[i] == cat: contador[cat]+=1 soma[cat]+=altura[i] for cat in categorias: media[cat] = soma[cat]/contador[cat] if tipo == 'soma': plt.bar(soma.keys(),soma.values(),*args,**kwargs) if tipo == 'media': plt.bar(media.keys(),media.values(),*args,**kwargs) if tipo == 'contador': plt.bar(contador.keys(),contador.values(),*args,**kwargs)
29/10: all_values = test_data_list[0].split(',') 29/11: print(all_values[0]) 29/12: n.query((numpy.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01) 29/13: image_array = numpy.asfarray(all_values[1:]).reshape((28,28)) 29/14: matplotlib.pyplot.imshow(image_array,cmap='Greys',interpolation='None') 30/1: import numpy as np 30/2: normal_list = range(1000) 30/3: %timeit [i**2 for i in normal_list] 31/1: path = './usagov_bitly_data2013-05-17-1368828605.txt' 31/2: import json 31/3: records = [json.loads(line) for line in open(path)] 31/4: records 31/5: from pandas import DataFrame, Series 31/6: import pandas as pd 31/7: import numpy as np 31/8: frame = DataFrame(records) 31/9: frame 31/10: frame['tz'] 31/11: frame['tz'][:10] 31/12: import pylab 31/13: import _tkinter 31/14: import tkinter 31/15: import Tkinter 31/16: :version 31/17: help 31/18: help() 31/19: tz_counts = frame['tz'].value_counts() 31/20: tz_counts 31/21: clean_tz = frame['tz 31/22: clean_tz = frame['tz'].fillna('Missing') 31/23: clean_tz[clean_tz == ''] = 'Unknown'
# Steplength (BF) and timestep (IMP) both set to 0.1 Dir += "Parameters/MC-Cycles/" Par = 100; MC_Cycles = 1000 # Just an arbitrary number for this case only file_BF = data.get_filename(Dir, Importance= False, Thr= 1, Dim= 3, Par= Par, MC_Cycles= MC_Cycles) file_IMP = data.get_filename(Dir, Importance= True, Thr= 1, Dim= 3, Par= Par, MC_Cycles= MC_Cycles) df_BF, time_BF = data.initiate_df(file_BF) df_IMP, time_IMP = data.initiate_df(file_IMP) plt.close("all") f.parameter_analysis_(df_BF, MC_list) f.parameter_analysis_(df_IMP, MC_list) # Parameters Analysis """ """ # Parameters Analysis # Testing how the number of MC-cycles affect the results for the bootstrapping and bloking variance # For both the brute-force (BF) method and the importance (IMP) method respectively Dir += "Parameters/MC-Cycles/" Par = 100; MC_Cycles = 1000 # Just an arbitrary number for this case only # Analytical case # file_BF = data.get_filename(Dir, Importance= False, BB= True, Thr= 8, Dim= 3, Par= Par, MC_Cycles= MC_Cycles) file_IMP = data.get_filename(Dir, Importance= True, BB= True, Thr= 8, Dim= 3, Par= Par, MC_Cycles= MC_Cycles) # df_BF, time_BF = data.initiate_df(file_BF) df_IMP, time_IMP = data.initiate_df(file_IMP) # data.df_sequential_alpha_sort(df_BF, MC_list) data.df_sequential_alpha_sort(df_IMP, MC_list)
def main(): # Get Dates to Call API today = datetime.datetime.now() month_before = today + datetime.timedelta(days=-30) # Set some quota/threshold - $ and KWH # Note: quota/money is float and bill is $ amount (str) monthly_quota = 2.0 monthly_quota_bill = locale.currency(monthly_quota, grouping=True) monthly_quota_kwh = cal.calculate_kwh_from_bill(monthly_quota) day_quota = monthly_quota/30 day_quota_bill = locale.currency(day_quota, grouping=True) # day_quota_kwh = cal.calculate_kwh_from_bill(day_quota) not needed at the time api = api_call.API() logged_in = api.login() print ("Logged into API") if logged_in[0]: # Successful Login print(logged_in[1]) # Collect Data to Populate Data Frame collecting_data = api.get_data(cal.api_time(str(month_before)), cal.api_time(str(today))) print ("Collected Data") if collecting_data[0]: # If we're able to collect data, do the following... # 1. Clean Data [avg_power, cleaned_data] = cal.clean_month_data(collecting_data[1], monthly_quota, monthly_quota_kwh) avg_power = avg_power / 1000.0 quota_hours = monthly_quota_kwh / avg_power cleaned_data["Remaining Hours"] = [(quota_hours - (kwh / avg_power)) for kwh in cleaned_data["Current KWH Total"]] print ("Cleaned Data") # 2. Create DataFrame with New Data data_frame = df.DF(cleaned_data) print ("Created DataFrame") # 3. Get Recent/Latest Update and Average Energy latest_data = data_frame.get_latest(1) current_kwh_total = float(latest_data['Current KWH Total']) current_day_kwh = float(latest_data['Energy']) print ("Got Latest Update") # 4. Create Graphs -- Optional # cal.create_graph('Current Bill', "Current Quota Bill Difference", monthly_quota, dataFrame) # cal.create_graph('Current KWH Total', "Current Quota KWH Difference", monthly_quota_kwh, dataFrame) # 5. Check if Monthly and Day Quotas are/were Met [current_day_money, current_day_bill] = cal.calculate_bill(current_day_kwh) day_update = um.create_update(latest_data, day_quota, day_quota_bill, current_day_money, current_day_bill, 'daily') day_message = um.generate_message(str(today), current_day_money, current_day_bill, day_quota, "daily") [current_month_money, current_month_bill] = cal.calculate_bill(current_kwh_total) month_update = um.create_update(latest_data, monthly_quota, monthly_quota_bill, current_month_money, current_day_bill, 'monthly') month_message = um.generate_message(str(today), current_month_money, current_month_bill, monthly_quota, "monthly") print ("Checked Quotas") print (data_frame.to_string()) output = {"All Good": True, "Status_Report_Month": { "Month_Update": month_update, "Month_Message": month_message }, "Status_Report_Day": { "Day_Update": day_update, "Day_Message": day_message } } print ("Output Created") print (output["Status_Report_Month"]) print (output["Status_Report_Day"]) return json.dumps(output) else: # Unable to Request Data output = {"All Good": False} return json.dumps(output) else: # Unsuccessful Login return logged_in[1]
# Shuffle training data shuffle_indices = np.random.permutation(np.arange(len(y_train))) X_train = X_train[shuffle_indices] y_train = y_train[shuffle_indices] # Minibatch training for i in range(0, len(y_train) // batch_size): start = i * batch_size batch_x = X_train[start:start + batch_size] batch_y = y_train[start:start + batch_size] # Run optimizer with batch net.run(opt, feed_dict={X: batch_x, Y: batch_y}) # Show progress if np.mod(i, 10) == 0: # MSE train and test mse_train.append(net.run(mse, feed_dict={X: X_train, Y: y_train})) mse_test.append(net.run(mse, feed_dict={X: X_test, Y: y_test})) print('MSE Train: ', mse_train[-1]) print('MSE Test: ', mse_test[-1]) # Prediction pred = net.run(out, feed_dict={X: X_test}) line2.set_ydata(pred) plt.title('Epoch ' + str(e) + ', Batch ' + str(i)) plt.pause(0.01) from pandas import DataFrame pred = DataFrame(pred) pred.to_csv('/Users/zozozoe/Desktop/daxpart4_pred.csv')
conn = engine.connect() * # Required for querying tables metadata = MetaData(conn) * # Table to query tbl = Table(TableName, metadata, autoload=True, schema="dbo") #tbl.create(checkfirst=True) * # Select all sql = tbl.select() * # run sql code result = conn.execute(sql) * # Insert to a dataframe df = DataFrame(data=list(result), columns=result.keys()) * # Close connection conn.close() ru=DataFrame(df.REFERRAL_CODE) ru.head() ruList=[] for x in ru.REFERRAL_CODE.unique(): ruList.append(x) end program. * begin program. * for item in ruList: print item end program.
#!/usr/bin/env python3 # -*- coding: utf-8 -*- import DataFrame from Stack import Stack productions = DataFrame.productions() data_map = DataFrame.data_frame() vns = DataFrame.vns_from_file() vts = DataFrame.vts_from_file() stack = Stack() tokens = [] def init_tokens(): """用存储词法分析的结果的文件初始化tokens """ global tokens with open("./result.txt", 'r') as f: buffer = f.read().split('\n') for i in range(len(buffer) - 1): if buffer[i].split(',')[1][1:-1] == '': tokens.append(',') else: tokens.append(buffer[i].split(',')[1][1:-1]) tokens.append('$') def next_token(): """读取下一个token """
# # <codecell> #!python import DataFrame # <markdowncell> # and read the file in using our desired CVS dialect: # # <codecell> #!python df=DataFrame.read_csv ("CSVSample.csv",dialect=DataFrame.access2000) # <markdowncell> # (note that the dialect is actually defined in the DataFrame class). It # is often useful to filter the data according to some criterion. # # Compatibility with Python 2.6 and above # --------------------------------------- # # Starting with Python 2.6, the sets module is deprecated, in order to get # rid of the warning, replace # # <codecell>
from DataFrame import * x = DataFrame("example.csv") y = x.copy() z = x.copy() d = x.copy() Z = x.copy() x.add(y) y.sub(x) x.transform("x + 1") y.showDF() z.add(d) y.add(x) x.add(Z) x.showDF() y.add(x) x.add(y) y.dropNA(0) y.add(x) x.add(y) y.add(x) x.add(y)
rom pandas import DataFrame from sklearn import linear_model import matplotlib.pyplot as plt import statsmodels.api as sm my_data = {'X1': [0,0,10,10,20,20], 'X2': [0,0,100,100,400,400], 'Y1': [5,7,15,17,9,11] } df = DataFrame(my_data,columns=['X1','X2','Y']) print (df) plt.scatter(df['X1'], df['Y'], color='red') plt.title('X1 Vs Y1', fontsize=14) plt.xlabel('X1', fontsize=14) plt.ylabel('Y1', fontsize=14) plt.grid(True) plt.show() plt.scatter(df['X2'], df['Y1'], color='green') plt.title(' X2 Vs Y1', fontsize=14) plt.xlabel('X2', fontsize=14) plt.ylabel('Y1', fontsize=14) plt.grid(True) plt.show() X = df[['X1','X2']] Y = df['Y1'] reg = linear_model.LinearRegression() reg.fit(X, Y) print('Intercept: \n', reg.intercept_)
rom pandas import DataFrame from sklearn import linear_model import matplotlib.pyplot as plt import statsmodels.api as sm my_data = {'P': [30,10,60,40,20,40,50,30], 'L': [10,0,45,65,70,30,40,20], 'Y1': [15,8,23,53,55,37,29,16] } df = DataFrame(my_data,columns=['P','L','Y1']) print (df) plt.scatter(df['P'], df['Y1'], color='red') plt.title('P Vs Y1', fontsize=14) plt.xlabel('P', fontsize=14) plt.ylabel('Y1', fontsize=14) plt.grid(True) plt.show() plt.scatter(df['L'], df['Y1'], color='green') plt.title(' L Vs Y1', fontsize=14) plt.xlabel('L', fontsize=14) plt.ylabel('Y1', fontsize=14) plt.grid(True) plt.show() X = df[['P','L']] Y = df['Y1'] reg = linear_model.LinearRegression() reg.fit(X, Y) print('Intercept: \n', reg.intercept_)
a 1 b 2 c 3 d 4 e 5 dtype: int64 In [32]: s2.index.name = 'index' In [33]: s2.index Out[33]: Index(['a', 'b', 'c', 'd', 'e'], dtype='object', name='index') 以上代码中先后创建了 s1 和 s2 两个序列,前一个使用了默认的整数索引,后一个使用了我们指定的字符索引,同时还可以我们可以对索引进行命名。 DataFrame 类似于二维数组,有行和列之分,除了像 Series 一样,多个行有索引而外,每个列上面还可以有标签 label, 索引和标签本身都可以被命名: In [73]: df = DataFrame(np.random.randn(4, 4), index=['a', 'b', 'c', 'd'], columns=['A', 'B', 'C', 'D']) In [74]: df Out[74]: A B C D a -0.112607 0.563528 -0.314797 -1.972133 b -1.378539 -0.939139 0.757630 -0.307336 c 0.866185 -2.155719 -1.485602 -0.344602 d -0.253973 -1.753680 -0.062741 0.911882 In [75]: df.index Out[75]: Index(['a', 'b', 'c', 'd'], dtype='object') In [76]: df.columns Out[76]: Index(['A', 'B', 'C', 'D'], dtype='object') 上面的代码中,通过指定索引和标签(columns 参数)创建了一个 DataFrame 实例。可以通过 df.index 和 df.columns 分别访问索引和标签。
def main(): formats = ['tif', 'img'] parser = ArgumentParser() parser.add_argument("-i", "--in", dest="inws", help="Input workspace") parser.add_argument("-o", "--out", dest="outws", help="Output workspace") parser.add_argument("-p", "--parameters", dest="parameters", help="Path to parameters csv file") parser.add_argument("-l", "--link-field", dest="link_field", help="Link field in the parameters file") parser.add_argument("-f", "--format", dest="format", default="tif", help="file format for FILENAME") parser.add_argument("-t", "--template", dest="template", default="<BODY1>_<ID1>_<BODY2>_<ID2>_<BODY3>", help="Template for file names in input workspace") parser.add_argument("-v", "--verbose", action="store_true", dest="verbose") args = parser.parse_args() if not args.inws: parser.error("Path to input workspace must be provided") else: inws = os.path.abspath(args.inws) if not os.path.exists(inws): parser.error("Input workspace {0} does not exist".format(inws)) if not args.outws: parser.error("Path to output workspace must be provided") else: outws = os.path.abspath(args.outws) if not os.path.exists(args.outws): parser.error("Output workspace {0} does not exist".format(outws)) if not args.parameters: parser.error("Path to parameters CSV file must be provided") else: parameters = os.path.abspath(args.parameters) if not os.path.exists(parameters): parser.error("Parameters file {0} does not ".format(parameters) + " exist") if args.format not in formats: parser.error("Provided format must be one of: %s" % ', '.join(formats)) # List the rasters found in the input workspace inrasters = list_rasters(inws, [args.format], sorted=True) # Read the parameters file in as Dataframe parameters_df = DataFrame.read_csv(args.parameters, dialect=DataFrame.ZCustom) # Get all the field names from the generated DataFrame fields = parameters_df.get_fields() if not args.link_field: parser.error("No link field provided, available fields " + "are: \n" + '\n'.join(fields)) elif args.link_field not in fields: parser.error("Link field provided not found, available fields " + "are: \n" + '\n'.join(fields)) else: link_field = args.link_field if args.verbose: print("\n") print("STARTING " + "*" * 70) print("Input workspace: {0}".format(inws)) print("Output workspace: {0}".format(outws)) print("Parameters file: {0}".format(parameters)) print("Format: {0}".format(args.format)) print("File name template: {0}".format(args.template)) if len(inrasters) > 0: print("Following {0} rasters found ".format(len(inrasters) / 2) + "in the input workspace:") for raster in inrasters: print("\t" + os.path.basename(raster)) else: print("Could not find any rasters with format <" + "{0}> in input workspace".format(args.format)) print("\n") # Construct ParsedFileNames from the input workspace based on a template inrasters = [ParsedFileName(raster, args.template) for raster in inrasters] process_sigmoidal(inrasters, parameters_df, link_field, outws, multiply=True)
arrOverPowerTime = [] arrOverVoltageTime = [] arrOverAmperageTime = [] arrTimeMeanTime = [] dfOverPowerTime = [] dfOverVoltageTime = [] dfOverAmperageTime = [] dfTimeMeanTime = [] Pandas = [arrOverPowerTime, arrOverVoltageTime, arrOverAmperageTime, arrTimeMeanTime] Numpy = [dfOverPowerTime, dfOverVoltageTime, dfOverAmperageTime, dfTimeMeanTime] for i in range(1, 7): df = DataFrame.dfCreate(pow(10, i)) arr = Array.arrCreate(df) for j in range(1, 5): timea = getTime("a", "{}".format(j), arr, df) print(timea) Numpy[j - 1].append(timea) timed = getTime("d", "{}".format(j), arr, df) Pandas[j - 1].append(timed) count = ["10", "100", "1000", "10000", "100000", "1000000"] overPowerTable = pd.DataFrame({"Array": Numpy[0], "DataFrame": Pandas[0], "Count": count}).set_index("Count") overVoltageTable = pd.DataFrame({"Array": Numpy[1], "DataFrame": Pandas[1], "Count": count}).set_index("Count")
if __name__ == '__main__': # set the working directory os.chdir(os.path.dirname(__file__)) datadir = r'C:\Data\Staging\Tests\input' outputdir = r'C:\Data\Staging\Tests\output' # read in the parameters # ESMK pfile = os.path.join('..', 'R', 'parameters_new.csv') # SuperMetso #pfile = r"H:/Data/SuperMetso/MSNFI_params.csv" params = DataFrame.read_csv(pfile, dialect=DataFrame.ZCustom) # Define the fields that link the CSV file to raster name template idfield = "IPUULAJI" # ComplexName template ID1 = 'puulaji' ID2 = 'osite' # ESMK template = '<BODY1>_<ID1>_<BODY2>_<ID2>_<BODY3>' #template = "<BODY1>_<ID1>_<BODY2>" raw_rasters = [ParsedFileName(raster, template) for raster in list_rasters(datadir, ['img'], sorted=True)]
def huanjings(html): huanjing_span = soup.find_all('span', class_="rst") huanjings = [] for huanjing in huanjing_span: if "环境" in huanjing.text: print(huanjing.text) huanjings.append(huanjing.text) return huanjings # print(len(stars(html))) print(len(huanjings(html))) df = DataFrame({ 'ID名字': names(html), '星级': stars(html), '口味': kouweis(html), '环境': huanjings(html), '服务': fuwus(html), '点评内容': pls(html) }) df.to_csv("E:/python_code/self_learn/df.csv", index=False, encoding='utf_8_sig') #df.to_excel("E:/python_code/self_learn/df.xls",sheet_name=’Sheet1’) writer = pd.ExcelWriter('output.xlsx') df.to_excel(writer, 'Sheet1') writer.save()