def make_excel(file_name, sheet, desc = ""): try: pd.to_excel(file_name, sheet_name = sheet) except Exception as e: print("엑셀 파일을 저장하는 도중 오류가 발생했습니다.") print(e.args) return True
def put_data(s, a, r, _s): if (check_num > 5): check_num = 0 pd.to_excel(data) r = np.array(r) data.loc[size] = np.concatenate((s, a[0], r, _s), axis=1)
def create_spreadsheet_pr(precision, recall, file_name): rel_list = list(relevance.keys()) import pandas as pd dataframes = [] for i in range(len(rel_list)): result = {} if len(precision[i]) < 100: p = precision[i] r = recall[i] while (len(p) < 100): p.append('nan') while (len(r) < 100): r.append('nan') title = 'Query ' + str(rel_list[i]) result = {title: list(range(1, 101)), 'Precision': p, 'Recall': r} else: title = 'Query ' + str(rel_list[i]) result = { title: list(range(1, 101)), 'Precision': precision[i], 'Recall': recall[i] } df = DataFrame(result, columns=[title, 'Recall', 'Precision']) dataframes.append(df) pd = pd.concat(dataframes, axis=1) pd.to_excel( '/Users/fathimakhazana/Documents/IRFinalProject/Results/' + file_name + '.xlsx', index=None, header=True) #Don't forget to add '.xlsx' at the end of the path
def main(): r = requests.get( "https://api.nasa.gov/planetary/apod?api_key=7cqHW801kCstlBsbcprRwwe01awpGk7XVVamgD0d" ) p = pandas.to_excel(r.json()) print(r) print(p) #x = norm(r) pyexcel.to_excel('~/mycode/nasa.xlsx')
def parse_ptag(self, response): item = ProxItem() item['Link'] = response.url item['content'] = '' SheetName = str(str(response.url.split('/')[-1]).split('.')[0])[0:21] if item['Link'].split('/')[-1] == 'www.proxemate.com': SheetName = 'proxemate.com' #print(response.url) for text in response.xpath('/html/body/div[4]/p/text()').extract(): item['content'] += text pd = self.GetWordCount2(item['content'], item['Link']) #ContentFrame=pd.DataFrame({'Content':item['content']}) #ContentFrame.to_excel(self.ExWriter,SheetName, index=False,startrow=0) pd.to_excel(self.ExWriter, SheetName, index=False, startrow=1) self.ExWriter.save() yield ({'Link': item['Link'], 'Content': item['content']})
df = pd.DataFrame(data, columns=['Country', 'Capital', 'Population']) # save & read files pd.read_csv('file.csv', header=None, nrows=5) pd.read_excel('file.xlsx') from sqlalchemy import creat_engine engine = creat_engine('sqlite:///:memory:') pd.read_sql('SELECT * FROM my_table;', engine) pd.read_sql_table('my_table;', engine) pd.read_sql_query('SELECT * FROM my_table;', engine) xlsx = pd.ExcelFile('file.xls') df = pd.read_excel(xlsx, 'Sheet1') pd.to_csv('file.csv') pd.to_excel('file.xlsx', sheet_name='Sheet1') pd.to_sql('file', engine) # frame feature df.shape df.index df.columns df.info() df.count() df.sum() df.cumsum() df.min() / df.max() df.idmin() / df.idmax() df.describe() df.mean()
text = nlp.pos(text) tnqkr_list.extend(text) print(text) # get_loc = lambda str : [x.replace("[","").replace(":LOC","") for x in re.findall('\\[[\\w]+:LOC',str)] front_list = [] for i, word in enumerate(tnqkr_list): if i != 0 and word[0] == keyword: for x in range(0, 5): if tnqkr_list[i - x][1] == 'VA' or tnqkr_list[ i - x][1] == 'IC' or tnqkr_list[ i - x][1] == 'NNG' or tnqkr_list[i - x][1] == 'NNP': front_list.append(tnqkr_list[i - x]) count = Counter(front_list) dict = count pd_list = [] for i in dict.keys(): if i[0] != keyword and len(i[0]) > 1: pd_dic = {'keyword': i[0], 'morphs': i[1], 'count': dict[i]} pd_list.append(pd_dic) print(pd_list) pd = pd.DataFrame(pd_list, columns=('keyword', 'morphs', 'count')) pd.to_excel('loc_count/front_' + keyword + '.xlsx', encoding='utf-8', index=True) # # from konlpy.tag import Mecab # m = Mecab() # m = m.pos('맛있는 수박') # print(m)
# results = evaluator.perform_experiments(scenarios=50, policies=[policy0,policy1,policy2,policy3], # uncertainty_sampling=LHS) n_scenarios = 500 n_policies = len(policies) with MultiprocessingEvaluator(dike_model) as evaluator: results = evaluator.perform_experiments(scenarios = n_scenarios, policies = policies) experiments, outcomes = results policies2 = experiments['policy'] data = pd.DataFrame.from_dict(outcomes) data['policy'] = policies2 sns.pairplot(data, hue='policy', vars=outcomes.keys(), ) plt.show() #%% dfresults = results #this section saves the results to excel or tar if needed to_excel = False if to_excel == True: timestamp = time.strftime("%m.%d-%H%M%S") pd.to_excel(dfresults, r'.\results{}.xlsx'.format(timestamp), index = False) to_tar = True if to_tar == True: timestamp = time.strftime("%m.%d-%H%M%S") dfresults.to_excel(r'.\results{}.xlsx'.format(timestamp), index = False) fn = 'results/{} scenarios {} policies_{}.tar.gz'.format(n_scenarios, n_policies, timestamp) save_results(results, fn)
# Writing customize function to handle null values def convertpeople(cell): if cell == "n.a": return "sam walton" return cell # call customize function by parameter converters dfex = pd.read_excel("C:\Manjunath\Personal\Manju\Pandas\excel.xlsx", "Sheet1", converters={"people": convertpeople}) # exporting dataframe to excel sheet by making index false pd.to_excel("new.xlsx", sheet_name="stocks", index=False) pd.to_excel("new.xlsx", sheet_name="stocks", index=False, startrow=3, startcol=2) # writing from particular row and col #Handling null values by fillna method # parsedates methode will convert to datetime dfweather = pd.read_csv( "C:\Manjunath\Personal\Manju\Pandas\csv\missing_data.csv", parse_dates=["day"]) dfweather.set_index("day", inplace=True) # setting day column as index value dffill = dfweather.fillna(0) # filling null values to 0 in dataframe
by='itemid').nunique().count() # Top 3 Preferred shops’ shopid that have the largest number of unique product shop_num = data[['shopid', 'itemid']].groupby(by='shopid').count().max() # Top 3 Categories that have the largest number of unique cross-border product product_num = data[[ 'category', 'cb_option', 'itemid' ]].query('cb_option==1').groupby(by='category').count().sort_values( by='itemid', ascending=False).head(3) # Top 3 shopid with the highest revenue data_temp = data[['shopid', 'itemid', 'price', 'sold_count']] data_temp['revenue'] = data_temp.price * data_temp.sold_count shop_top3_revenue_num = data_temp.groupby('shopid').sum().sort_values( by='revenue').head(3) # number of products that have more than 3 variations item_num = data[['itemid', 'item_variation']].groupby('itemid').nunique() # sign duplicated data['is_duplicated'] = data.duplicated(['item_name', 'item_description']) data['is_duplicated'] = 1 if data.is_dumplicated else 0 # Find duplicate listings that has less than 2 sold count data_out = data.query('is_duplicated == 1 and sold_count < 2') pd.to_excel('./duplicated_listings.xlsx', sheet=0) # Find the preferred shop shopid that have the most number of duplicated listing data.groupby(by='shopid').sum(by='duplicated').sort_values( by='duplicated').head()
from sklearn.compose import ColumnTransformer import numpy as np import pandas as pd from numpy.random import randn import matplotlib.pyplot as plt import re import datetime ## DataFrame 생성, colunms, row, index 수 df = pd.DataFrame(randn(5, 4), ['A', 'B', 'C', 'D', 'E'], ['W', 'X', 'Y', 'Z']) df.iloc[0, 1] = np.NaN df.iloc[2, 3] = np.NaN pd.to_excel('a.xlsx') pd.to_csv('a.csv') pd.read_csv('') df = pd.read_excel('pdata.xlsx') df = df.set_index('Date') data = np.arange(12).reshape(4, 3) data[:2, 1:] print(data * 3) print(data + 100) # columns dft = df.columns[2:].tolist() list = [1, 2, 10] print(list * 3) arr = np.array(list)
import pandas as pd import numpy as np dates = pd.date_range('20190921111630', periods=5) pd = pd.DataFrame(np.arange(25).reshape((5, 5)), index=dates, columns=['A', 'B', 'C', 'D', 'F']) pd.to_excel('C:/Users/M/Desktop/输出excel/output.xlsx') print(pd)
import pandas as pd #处理数据的 import numpy as np #处理数据的 import matplotlib.pyplot as plt #画图表的,而且特别好看 #pandas 已经使用的命令 data = pd.read_excel('C:\\Users\\M\\Desktop\\python\\计算机学院研究生.xlsx') #读取原始数据 data = pd.to_excel('C:\\Users\\M\\Desktop\\output.xlsx') #输出数据到excel data = pd.DataFram(colums=list('ABCD'),index=list('1234') ) #生成一个DataFrame,行是ABCD,列是1234 a = data.at['3行','2列'] #访问data中某一位置数据 a = data.loc['3行','2列'] #效果同上 a = data.loc['缪奇峰'] #访问缪奇峰(行)的所有数据 a = data.iloc[1,2] #访问1行2列的数据 a = data.sort_values(by='出生日期') #按照‘出生日期’的值进行排序 a = data[data.出生日期 > 19950101] #访问出生日期大于19950101的人 data[data.出生日期 > 19950101] ='都是我小弟' #出生日期大于19950101,值改成‘都是我小弟’ a = data.学号 #访问某一列的数据,数据类型为数组 a = data[10:45] #data的10~45行的数据 a = data.生日.max() #生日这一列的最大值,最小为min() a = data.学号.mean()#求学号这一列的平均值 a = data.shape #表格的形状,行列数 a = data.shape[0] #表格的行数,[1]为列 #numpy 已经使用的命令 a = np.arange(33) #生成0~33的一个数组 a = np.nan #a的值为空 NaN #matplotlib 已经使用的命令 data = pd.read_excel('C:\\Users\\M\\Desktop\\数学建模\\运动学片段\\3低速\\final.xlsx')
@author: 何友鑫 """ from WindPy import * import tushare as ts import numpy as np from Mystrategy import MyStrategy import pandas as pd from pandas import DataFrame if __name__ == "__main__": #取盈运能力、成长能力的数据 profit_data = ts.get_profit_data(2017, 3) growth_data = ts.get_growth_data(2017, 3) profit_data.to_excel("profit_data_2017_3.xlsx") growth_data.to_excel("growth_data_2017_3.xlsx") pd.to_excel(profit_data, ) s = MyStrategy(profit_data, growth_data) stock = s.getData() print(stock) w.start() MACD_data = w.wsd( "000001.SZ,000002.SZ,000004.SZ,000006.SZ,000005.SZ,000007.SZ", "MACD", "2017-10-27", "2017-11-25", "MACD_L=26;MACD_S=12;MACD_N=9;MACD_IO=3") MACD_data.Times #pd.Series(MACD_data.Times), MACD_df = DataFrame(pd.Series(MACD_data.Data)) ''' df=ts.get_hist_data(code='sh',start='2017-10-01',end='2017-11-23',ktype='D')
def convert_eps_cell(cell): if cell='not avialable': return None return cell df = pd.read_excel('stock_data.xlsx', 'Sheet1', converters={ 'people': convert_people_cell, 'eps': convert_eps_cell }) # ================================= # ### Writing to excel file ### # Use the to_excel method to write our dataframe to excel file pd.to_excel('new1.xlsx', sheet_name='stocks') # Same as csv file if you don't want to export header and index then put header and index parameter to False for respective effects pd.to_excel('new1.xlsx', sheet_name='stocks', index=False, header=False) # To start writing cretain rows and columns pd.to_excel('new1.xlsx', sheet_name='stocks', startrow=1, startcol=2) # ---------------------------------- # To write multiple dataframes in one single excel file # Creating two different dataframes df_stocks = pd.DataFrame({ 'tickers': ['GOOGL', 'WMT', 'MSFT'], 'price': [845, 65, 64 ], 'pe': [30.37, 14.26, 30.97],
#basic: df = pd.read_excel("weather_data.xlsx","Sheet1") # Reading with Cell Convertor (conditional function) when importing (column specific) def name_of_function(cell): if cell=="n.a.": return 'value you want' return cell df = pd.read_excel("folder/file.xlsx","Sheet1", converters = { 'column': name_of_function }) # Writing excel: (To offset:) startrow, startcol, (can also use same CSV arguments like index=Fale etc.) pd.to_excel("folder/file.xlsx",sheet_name="sheet1") # Writing multiple dataframes to seperate sheets: with pd.ExcelWriter('file.xlsx') as writer: df_first.to_excel(writer, sheet_name="sheet1") df_second.to_excel(writer, sheet_name="sheet2") #################################################################### # WHERE CLAUSE Filters # Documentation: "Pandas Series operations"
import pandas as pd def make_file_with_sensei_name(sensei_name): """先生の名前を入れると、先生のpandasが生成されます。""" return None necessary_columns = ["学籍番号", "氏名" .....] input_df = pd.read_excel("../機密ファイル.xlsx") output_df = input_df.loc[:, necessary_columns] output_df["評価"] = "" output_df["詳細報告_コメント"] = "" sensei_name_list = set(input_df["指導教員"]) for sensei_name in sensei_name_list: sensei_gotono_df = make_file_with_sensei_name(sensei_name) pd.to_excel("相談週間_"+sensei_name)
def query_all_dumps2excel(self, sql: str, params: Optional[Sequence] = None, dumped_excel="dumped.xlsx") -> None: pd = self.query_all_return_pandas(sql, params) pd.to_excel(dumped_excel)
print(i, j ) from itertools import product sum(max(x, y) for x, y in product(range(1, 7), range(1, 7))) / 36 #### Load/Save df = pd.read_csv("file.csv") pd.to_csv("file.csv", index=False) df = pd.read_stata("file.dta") labels = pd.io.stata.StataReader(r"C:\Users\XXX.dta").variable_labels() df.to_stata(r"C:\Users\XXX.dta", variable_labels = labels) df = pd.read_excel('file.xlsx', index_col=None, header=0) pd.to_excel("file.xlsx") df = pd.read_pickle("file.pkl") pd.to_pickle("file.pkl") def timestamp(): time = f"{datetime.datetime.now().date()}--{datetime.datetime.now().time().replace(microsecond=0)}" return time.replace(":", "-") with open("all_players.txt", "w") as f: for player in all_players: f.write(player +"\n") all_teams = [] with open("all_teams.txt", "r") as f: for line in f:
df = df1[df1['Created time'] <= '2019-06-07 00:00:00'] # In[9]: #Printing The Default Data df[['Group', 'Status']].head(5) # In[4]: df.head() # In[7]: #Grouping Data according to Group And Status g1 = df1.groupby(['Group', 'Status']).agg({'Status': 'count'}) # In[10]: #Printing Grouped Data g1.head(8) # In[110]: #Converting Multilevel Index to DataFrame Usng Unstacked Function g1.unstack() # In[ ]: #Saving It In An Excel pd.to_excel('')
print("display all col names") print(df.columns) print(df.sort_values('col2')) print(df.isnull()) print("####################################################################") data = { 'A': ['f', 'f', 'f', 'b', 'b', 'b'], 'B': ['one', 'one', 'two', 'two', 'two', 'one'], 'C': ['x', 'y', 'x', 'y', 'x', 'y'], 'D': [1, 3, 2, 5, 4, 1] } df = pd.DataFrame(data) print(df) print("Create a pivot table") print(df.pivot_table(values='D', index=['A', 'B'])) # Part 8 # Can read excel sheet pd.read_excel("sample.xlsx") #Can put df into excel pd.to_excel("OutPut.xlsx") #Read HTML pd.read_html("sample.html") # can put into sql tables from sqlalchemy import create_engine engine = create_engine("sqlite:///:memory:") data[0].to_sql("my_table", engine) sqldf = pd.read_sql("my_table", engine) sqldf.head(5)
# File IO with Microsoft Excel #-------------------------------------------------- # Pandas has more advanced tricks for file I/O (that is, Input/Output). # Here are some samples of code to use when the corresponding # files exist in your working directory. # One of the most common way to work with a dataset is to read and write to a csv file. pd.read_csv('file.csv', header=None, nrows=5) pd.to_csv('myDataFrame.csv') # You can also read from Excel files, by selecting the worksheet. xlsx = pd.ExcelFile('file.xls') df = pd.read_excel(xlsx, 'Sheet1') # You can also write to an Excel worksheet. pd.read_excel('file.xlsx') pd.to_excel('dir/myDataFrame.xlsx', sheet_name='Sheet1') # For examples of the use of the pandas package to prepare data # for linear regression and estimate the model, see the sample files # reg_with_stats_models.py and reg_with_sklearn.py above. ################################################## # End. ##################################################
def stock_data(name_list): stock_base = ts.get_stock_basics() pd.to_excel('stock_base.xlsx')
temp_id.append(uuid.uuid1()) # temp_id[0]先赋值 for i in range(1, m): if session_id[i] == session_id[i - 1]: temp_id.append(temp_id[i - 1]) # 给temp_id[i]赋值,i从1开始 else: temp_id.append(uuid_uuid1()) for x,y in zip(temp_id,query_text): url_all ='' r = requests.post(url_all,data=json.dumps(data3)) if r.status_code == 200: data.append(r) else if r.status_code in (500,501,502....) print("服务器故障,睡眠半小时后重新发送") sleep(30) r= requests.post(...) #此处或者做成循环,每次睡醒就发送请求,如果还是500系列,继续循环,如果是其他号码,就跳出循环 else: print("报错,错误码: %s" % r.status_code) data.append('请求错误') path = r'C:\Users\Administrator\Desktop\' filename = "result"+str(count) data_result = pd.to_excel(path+filename) count=count+1