Esempio n. 1
0
def make_excel(file_name, sheet, desc = ""):
    try:
        pd.to_excel(file_name, sheet_name = sheet)
    except Exception as e:
        print("엑셀 파일을 저장하는 도중 오류가 발생했습니다.")
        print(e.args)
    return True
Esempio n. 2
0
def put_data(s, a, r, _s):
    if (check_num > 5):
        check_num = 0
        pd.to_excel(data)

    r = np.array(r)
    data.loc[size] = np.concatenate((s, a[0], r, _s), axis=1)
Esempio n. 3
0
def create_spreadsheet_pr(precision, recall, file_name):
    rel_list = list(relevance.keys())
    import pandas as pd
    dataframes = []
    for i in range(len(rel_list)):
        result = {}
        if len(precision[i]) < 100:
            p = precision[i]
            r = recall[i]
            while (len(p) < 100):
                p.append('nan')
            while (len(r) < 100):
                r.append('nan')
            title = 'Query ' + str(rel_list[i])
            result = {title: list(range(1, 101)), 'Precision': p, 'Recall': r}
        else:
            title = 'Query ' + str(rel_list[i])
            result = {
                title: list(range(1, 101)),
                'Precision': precision[i],
                'Recall': recall[i]
            }
        df = DataFrame(result, columns=[title, 'Recall', 'Precision'])
        dataframes.append(df)
    pd = pd.concat(dataframes, axis=1)
    pd.to_excel(
        '/Users/fathimakhazana/Documents/IRFinalProject/Results/' + file_name +
        '.xlsx',
        index=None,
        header=True)  #Don't forget to add '.xlsx' at the end of the path
Esempio n. 4
0
def main():

    r = requests.get(
        "https://api.nasa.gov/planetary/apod?api_key=7cqHW801kCstlBsbcprRwwe01awpGk7XVVamgD0d"
    )
    p = pandas.to_excel(r.json())
    print(r)
    print(p)
    #x = norm(r)
    pyexcel.to_excel('~/mycode/nasa.xlsx')
Esempio n. 5
0
    def parse_ptag(self, response):

        item = ProxItem()
        item['Link'] = response.url
        item['content'] = ''
        SheetName = str(str(response.url.split('/')[-1]).split('.')[0])[0:21]
        if item['Link'].split('/')[-1] == 'www.proxemate.com':
            SheetName = 'proxemate.com'

        #print(response.url)
        for text in response.xpath('/html/body/div[4]/p/text()').extract():

            item['content'] += text
        pd = self.GetWordCount2(item['content'], item['Link'])
        #ContentFrame=pd.DataFrame({'Content':item['content']})
        #ContentFrame.to_excel(self.ExWriter,SheetName, index=False,startrow=0)
        pd.to_excel(self.ExWriter, SheetName, index=False, startrow=1)
        self.ExWriter.save()
        yield ({'Link': item['Link'], 'Content': item['content']})
Esempio n. 6
0
df = pd.DataFrame(data, columns=['Country', 'Capital', 'Population'])

# save & read files
pd.read_csv('file.csv', header=None, nrows=5)
pd.read_excel('file.xlsx')
from sqlalchemy import creat_engine
engine = creat_engine('sqlite:///:memory:')
pd.read_sql('SELECT * FROM my_table;', engine)
pd.read_sql_table('my_table;', engine)
pd.read_sql_query('SELECT * FROM my_table;', engine)

xlsx = pd.ExcelFile('file.xls')
df = pd.read_excel(xlsx, 'Sheet1')

pd.to_csv('file.csv')
pd.to_excel('file.xlsx', sheet_name='Sheet1')
pd.to_sql('file', engine)

# frame feature
df.shape
df.index
df.columns
df.info()
df.count()

df.sum()
df.cumsum()
df.min() / df.max()
df.idmin() / df.idmax()
df.describe()
df.mean()
Esempio n. 7
0
    text = nlp.pos(text)
    tnqkr_list.extend(text)
print(text)
# get_loc = lambda str : [x.replace("[","").replace(":LOC","") for x in re.findall('\\[[\\w]+:LOC',str)]
front_list = []
for i, word in enumerate(tnqkr_list):
    if i != 0 and word[0] == keyword:
        for x in range(0, 5):
            if tnqkr_list[i - x][1] == 'VA' or tnqkr_list[
                    i - x][1] == 'IC' or tnqkr_list[
                        i - x][1] == 'NNG' or tnqkr_list[i - x][1] == 'NNP':
                front_list.append(tnqkr_list[i - x])

count = Counter(front_list)
dict = count
pd_list = []
for i in dict.keys():
    if i[0] != keyword and len(i[0]) > 1:
        pd_dic = {'keyword': i[0], 'morphs': i[1], 'count': dict[i]}
        pd_list.append(pd_dic)

print(pd_list)
pd = pd.DataFrame(pd_list, columns=('keyword', 'morphs', 'count'))
pd.to_excel('loc_count/front_' + keyword + '.xlsx',
            encoding='utf-8',
            index=True)
#
# from konlpy.tag import Mecab
# m = Mecab()
# m = m.pos('맛있는 수박')
# print(m)
Esempio n. 8
0
    #     results = evaluator.perform_experiments(scenarios=50, policies=[policy0,policy1,policy2,policy3],
    #                                             uncertainty_sampling=LHS)
    n_scenarios = 500
    n_policies = len(policies)
    with MultiprocessingEvaluator(dike_model) as evaluator:
        results = evaluator.perform_experiments(scenarios = n_scenarios,
                                                policies = policies)

    experiments, outcomes = results
    policies2 = experiments['policy']
    data = pd.DataFrame.from_dict(outcomes)
    data['policy'] = policies2
    
    sns.pairplot(data, hue='policy',  vars=outcomes.keys(), )
    plt.show()
#%%
    
    dfresults = results
    #this section saves the results to excel or tar if needed
    to_excel = False
    if to_excel == True:
        timestamp = time.strftime("%m.%d-%H%M%S")    
        pd.to_excel(dfresults, r'.\results{}.xlsx'.format(timestamp), index = False)
        
    to_tar = True
    if to_tar == True:
        timestamp = time.strftime("%m.%d-%H%M%S")    
        dfresults.to_excel(r'.\results{}.xlsx'.format(timestamp), index = False)
        fn = 'results/{} scenarios {} policies_{}.tar.gz'.format(n_scenarios, n_policies, timestamp)
        save_results(results, fn)
Esempio n. 9
0

# Writing customize function to handle null values
def convertpeople(cell):
    if cell == "n.a":
        return "sam walton"
    return cell


# call customize function by parameter converters
dfex = pd.read_excel("C:\Manjunath\Personal\Manju\Pandas\excel.xlsx",
                     "Sheet1",
                     converters={"people": convertpeople})

# exporting dataframe to excel sheet by making index false
pd.to_excel("new.xlsx", sheet_name="stocks", index=False)

pd.to_excel("new.xlsx",
            sheet_name="stocks",
            index=False,
            startrow=3,
            startcol=2)  # writing from particular row and col

#Handling null values by fillna method
# parsedates methode will convert to datetime
dfweather = pd.read_csv(
    "C:\Manjunath\Personal\Manju\Pandas\csv\missing_data.csv",
    parse_dates=["day"])

dfweather.set_index("day", inplace=True)  # setting day column as index value
dffill = dfweather.fillna(0)  # filling null values to 0 in dataframe
Esempio n. 10
0
        by='itemid').nunique().count()

    # Top 3 Preferred shops’ shopid that have the largest number of unique product
    shop_num = data[['shopid', 'itemid']].groupby(by='shopid').count().max()
    # Top 3 Categories that have the largest number of unique cross-border product
    product_num = data[[
        'category', 'cb_option', 'itemid'
    ]].query('cb_option==1').groupby(by='category').count().sort_values(
        by='itemid', ascending=False).head(3)

    # Top 3 shopid with the highest revenue
    data_temp = data[['shopid', 'itemid', 'price', 'sold_count']]
    data_temp['revenue'] = data_temp.price * data_temp.sold_count
    shop_top3_revenue_num = data_temp.groupby('shopid').sum().sort_values(
        by='revenue').head(3)

    # number of products that have more than 3 variations
    item_num = data[['itemid', 'item_variation']].groupby('itemid').nunique()

    # sign duplicated
    data['is_duplicated'] = data.duplicated(['item_name', 'item_description'])
    data['is_duplicated'] = 1 if data.is_dumplicated else 0

    # Find duplicate listings that has less than 2 sold count
    data_out = data.query('is_duplicated == 1 and sold_count < 2')
    pd.to_excel('./duplicated_listings.xlsx', sheet=0)

    # Find the preferred shop shopid that have the most number of duplicated listing
    data.groupby(by='shopid').sum(by='duplicated').sort_values(
        by='duplicated').head()
Esempio n. 11
0
from sklearn.compose import ColumnTransformer
import numpy as np
import pandas as pd
from numpy.random import randn
import matplotlib.pyplot as plt
import re
import datetime

## DataFrame 생성, colunms, row, index 수
df = pd.DataFrame(randn(5, 4), ['A', 'B', 'C', 'D', 'E'], ['W', 'X', 'Y', 'Z'])
df.iloc[0, 1] = np.NaN
df.iloc[2, 3] = np.NaN

pd.to_excel('a.xlsx')
pd.to_csv('a.csv')
pd.read_csv('')
df = pd.read_excel('pdata.xlsx')
df = df.set_index('Date')

data = np.arange(12).reshape(4, 3)
data[:2, 1:]
print(data * 3)
print(data + 100)

# columns
dft = df.columns[2:].tolist()

list = [1, 2, 10]
print(list * 3)
arr = np.array(list)
Esempio n. 12
0
import pandas as pd
import numpy as np

dates = pd.date_range('20190921111630', periods=5)
pd = pd.DataFrame(np.arange(25).reshape((5, 5)),
                  index=dates,
                  columns=['A', 'B', 'C', 'D', 'F'])

pd.to_excel('C:/Users/M/Desktop/输出excel/output.xlsx')

print(pd)
Esempio n. 13
0
import pandas as pd                 #处理数据的
import numpy as np                  #处理数据的
import matplotlib.pyplot as plt     #画图表的,而且特别好看

#pandas 已经使用的命令
data = pd.read_excel('C:\\Users\\M\\Desktop\\python\\计算机学院研究生.xlsx')      #读取原始数据
data = pd.to_excel('C:\\Users\\M\\Desktop\\output.xlsx')       #输出数据到excel
data = pd.DataFram(colums=list('ABCD'),index=list('1234') )    #生成一个DataFrame,行是ABCD,列是1234

a = data.at['3行','2列']      #访问data中某一位置数据
a = data.loc['3行','2列']     #效果同上
a = data.loc['缪奇峰']        #访问缪奇峰(行)的所有数据
a = data.iloc[1,2]            #访问1行2列的数据

a = data.sort_values(by='出生日期')             #按照‘出生日期’的值进行排序
a = data[data.出生日期 > 19950101]              #访问出生日期大于19950101的人
data[data.出生日期 > 19950101] ='都是我小弟'     #出生日期大于19950101,值改成‘都是我小弟’

a = data.学号       #访问某一列的数据,数据类型为数组
a = data[10:45]     #data的10~45行的数据
a = data.生日.max() #生日这一列的最大值,最小为min()
a = data.学号.mean()#求学号这一列的平均值
a = data.shape      #表格的形状,行列数
a = data.shape[0]   #表格的行数,[1]为列

#numpy 已经使用的命令
a = np.arange(33)  #生成0~33的一个数组
a = np.nan          #a的值为空 NaN

#matplotlib 已经使用的命令
data = pd.read_excel('C:\\Users\\M\\Desktop\\数学建模\\运动学片段\\3低速\\final.xlsx')
Esempio n. 14
0
@author: 何友鑫
"""
from WindPy import *
import tushare as ts
import numpy as np
from Mystrategy import MyStrategy
import pandas as pd
from pandas import DataFrame
if __name__ == "__main__":
    #取盈运能力、成长能力的数据
    profit_data = ts.get_profit_data(2017, 3)
    growth_data = ts.get_growth_data(2017, 3)
    profit_data.to_excel("profit_data_2017_3.xlsx")
    growth_data.to_excel("growth_data_2017_3.xlsx")
    pd.to_excel(profit_data, )
    s = MyStrategy(profit_data, growth_data)
    stock = s.getData()
    print(stock)

    w.start()
    MACD_data = w.wsd(
        "000001.SZ,000002.SZ,000004.SZ,000006.SZ,000005.SZ,000007.SZ", "MACD",
        "2017-10-27", "2017-11-25", "MACD_L=26;MACD_S=12;MACD_N=9;MACD_IO=3")
    MACD_data.Times
    #pd.Series(MACD_data.Times),
    MACD_df = DataFrame(pd.Series(MACD_data.Data))
'''


df=ts.get_hist_data(code='sh',start='2017-10-01',end='2017-11-23',ktype='D')
Esempio n. 15
0
def convert_eps_cell(cell):
  if cell='not avialable':
    return None
  return cell

df = pd.read_excel('stock_data.xlsx', 'Sheet1', converters={
  'people': convert_people_cell,
  'eps': convert_eps_cell
})
# =================================
# ### Writing to excel file ###


# Use the to_excel method to write our dataframe to excel file
pd.to_excel('new1.xlsx', sheet_name='stocks')

# Same as csv file if you don't want to export header and index then put header and index parameter to False for respective effects
pd.to_excel('new1.xlsx', sheet_name='stocks', index=False, header=False)

# To start writing cretain rows and columns
pd.to_excel('new1.xlsx', sheet_name='stocks', startrow=1, startcol=2)
# ----------------------------------
# To write multiple dataframes in one single excel file

# Creating two different dataframes

df_stocks = pd.DataFrame({
    'tickers': ['GOOGL', 'WMT', 'MSFT'],
    'price': [845, 65, 64 ],
    'pe': [30.37, 14.26, 30.97],
Esempio n. 16
0
#basic:
df = pd.read_excel("weather_data.xlsx","Sheet1")

# Reading with Cell Convertor (conditional function) when importing (column specific)
def name_of_function(cell):
    if cell=="n.a.":
        return 'value you want'
    return cell

df = pd.read_excel("folder/file.xlsx","Sheet1", converters = {
    'column': name_of_function
})

# Writing excel: (To offset:) startrow, startcol, (can also use same CSV arguments like index=Fale etc.)
pd.to_excel("folder/file.xlsx",sheet_name="sheet1")

# Writing multiple dataframes to seperate sheets:
with pd.ExcelWriter('file.xlsx') as writer:
    df_first.to_excel(writer, sheet_name="sheet1")
    df_second.to_excel(writer, sheet_name="sheet2")







####################################################################
# WHERE CLAUSE Filters
# Documentation: "Pandas Series operations"
Esempio n. 17
0
import pandas as pd

def make_file_with_sensei_name(sensei_name):
	"""先生の名前を入れると、先生のpandasが生成されます。"""
	return None

necessary_columns = ["学籍番号", "氏名" .....]
input_df = pd.read_excel("../機密ファイル.xlsx")
output_df = input_df.loc[:, necessary_columns]
output_df["評価"] = ""
output_df["詳細報告_コメント"] = ""

sensei_name_list = set(input_df["指導教員"])
for sensei_name in sensei_name_list:
	sensei_gotono_df = make_file_with_sensei_name(sensei_name)
	pd.to_excel("相談週間_"+sensei_name)
Esempio n. 18
0
 def query_all_dumps2excel(self,
                           sql: str,
                           params: Optional[Sequence] = None,
                           dumped_excel="dumped.xlsx") -> None:
     pd = self.query_all_return_pandas(sql, params)
     pd.to_excel(dumped_excel)
Esempio n. 19
0
  print(i, j )

from itertools import product
sum(max(x, y) for x, y in product(range(1, 7), range(1, 7))) / 36
             
#### Load/Save

df = pd.read_csv("file.csv")
pd.to_csv("file.csv", index=False)

df = pd.read_stata("file.dta")
labels = pd.io.stata.StataReader(r"C:\Users\XXX.dta").variable_labels()
df.to_stata(r"C:\Users\XXX.dta", variable_labels = labels)

df = pd.read_excel('file.xlsx', index_col=None, header=0) 
pd.to_excel("file.xlsx")

df = pd.read_pickle("file.pkl")
pd.to_pickle("file.pkl")
             
def timestamp():
    time = f"{datetime.datetime.now().date()}--{datetime.datetime.now().time().replace(microsecond=0)}"
    return time.replace(":", "-")
             
with open("all_players.txt", "w") as f:
    for player in all_players:
             f.write(player +"\n")
             
all_teams = []
with open("all_teams.txt", "r") as f:
    for line in f:
df = df1[df1['Created time'] <= '2019-06-07 00:00:00']

# In[9]:

#Printing The Default Data
df[['Group', 'Status']].head(5)

# In[4]:

df.head()

# In[7]:

#Grouping Data according to Group And Status
g1 = df1.groupby(['Group', 'Status']).agg({'Status': 'count'})

# In[10]:

#Printing Grouped Data
g1.head(8)

# In[110]:

#Converting Multilevel Index to DataFrame Usng Unstacked Function
g1.unstack()

# In[ ]:

#Saving It In An Excel
pd.to_excel('')
Esempio n. 21
0
print("display all col names")
print(df.columns)
print(df.sort_values('col2'))
print(df.isnull())

print("####################################################################")
data = {
    'A': ['f', 'f', 'f', 'b', 'b', 'b'],
    'B': ['one', 'one', 'two', 'two', 'two', 'one'],
    'C': ['x', 'y', 'x', 'y', 'x', 'y'],
    'D': [1, 3, 2, 5, 4, 1]
}
df = pd.DataFrame(data)
print(df)
print("Create a pivot table")
print(df.pivot_table(values='D', index=['A', 'B']))

# Part 8
# Can read excel sheet
pd.read_excel("sample.xlsx")

#Can put df into excel
pd.to_excel("OutPut.xlsx")
#Read HTML
pd.read_html("sample.html")
# can put into sql tables
from sqlalchemy import create_engine
engine = create_engine("sqlite:///:memory:")
data[0].to_sql("my_table", engine)
sqldf = pd.read_sql("my_table", engine)
sqldf.head(5)
Esempio n. 22
0
# File IO with Microsoft Excel
#--------------------------------------------------

# Pandas has more advanced tricks for file I/O (that is, Input/Output).

# Here are some samples of code to use when the corresponding
# files exist in your working directory.

# One of the most common way to work with a dataset is to read and write to a csv file.

pd.read_csv('file.csv', header=None, nrows=5)
pd.to_csv('myDataFrame.csv')

# You can also read from Excel files, by selecting the worksheet.

xlsx = pd.ExcelFile('file.xls')
df = pd.read_excel(xlsx, 'Sheet1')

# You can also write to an Excel worksheet.

pd.read_excel('file.xlsx')
pd.to_excel('dir/myDataFrame.xlsx', sheet_name='Sheet1')

# For examples of the use of the pandas package to prepare data
# for linear regression and estimate the model, see the sample files
# reg_with_stats_models.py and reg_with_sklearn.py above.

##################################################
# End.
##################################################
Esempio n. 23
0
def stock_data(name_list):
    stock_base = ts.get_stock_basics()
    pd.to_excel('stock_base.xlsx')
Esempio n. 24
0
    temp_id.append(uuid.uuid1()) # temp_id[0]先赋值
    for i in range(1, m):
        if session_id[i] == session_id[i - 1]:
            temp_id.append(temp_id[i - 1])   # 给temp_id[i]赋值,i从1开始
        else:
            temp_id.append(uuid_uuid1())

    for x,y in zip(temp_id,query_text):
        url_all =''
        r = requests.post(url_all,data=json.dumps(data3))
        if r.status_code == 200:
            data.append(r)
        else if r.status_code in (500,501,502....)
             print("服务器故障,睡眠半小时后重新发送")
             sleep(30)
             r= requests.post(...)  #此处或者做成循环,每次睡醒就发送请求,如果还是500系列,继续循环,如果是其他号码,就跳出循环
        else:
            print("报错,错误码: %s" % r.status_code)
            data.append('请求错误')

    path = r'C:\Users\Administrator\Desktop\'
    filename = "result"+str(count)
    data_result = pd.to_excel(path+filename)
    count=count+1