dfSorted = df.sort_values(['price']) dfSorted['rank'] = np.arange(1, len(df) + 1) # price top5 dfPriceTop5 = dfSorted.head(5) # price reverse top 5 dfPriceReverseTop5 = df.sort_values(['price'], ascending=[0]).head(5) print("av:", df['price'].mean()) print("std:", df['price'].std()) print("var:", df['price'].var()) d = {'col1': df['price'].mean(), 'col2': []} dfResult = pd.DataFrame(data=d) # 1개의 엑셀 파일에 저장 dfs = [{ "df": dfSorted, "sheetName": "sorted" }, { "df": dfPriceTop5, "sheetName": "priceTop5" }, { "df": dfPriceReverseTop5, "sheetName": "priceReverseTop5" }, { "df": dfResult, "sheetName": "dfResult" }] save(dfs, "./productAnalyze.xlsx")
import pandas as pd from libs.excelSaver import save pd.set_option('display.max_colwidth', -1) df = pd.read_json("./195297.json") print(df.count()) dfSorted = df.sort_values(['price'], ascending=[0]) dfTop10 = dfSorted.head(10) save(dfTop10, 'priceTop10.xlsx')
import pandas as pd from libs.excelSaver import save df = pd.read_json("./result.json") print(df.count()) # 가격이 가장 비싼 제품, 가격이 가장 싼 제품 df = df.sort_values(['price'], ascending=[0]) print(df.columns) print(df['name']) # 매일유업 - 경쟁자 분석, 우리들이 마케팅 잘 하고 있나 # name에 우유가 포함되어 있는 데이터만 뽑아보세요. # how to filter include some word pandas # 특정 단어가 포함되어 있는 데이터 필터링 하기 drinks = ["우유", "두유", "커피", "라떼" "콜라", "오렌지", "수"] for drink in drinks: resultDf = df[df['name'].str.contains(drink)] save(resultDf, drink + ".xlsx") print(resultDf.count())
import pandas as pd from libs.excelSaver import save pd.set_option('display.max_colwidth', -1) df = pd.read_json("./195297.json") brands = ["킨더", "페레로로쉐", "벨지안", "엠엔앰", "누텔라", "마켓오", "롯데"] for brandName in brands[:1]: dfFiltered = df[df['name'].str.contains(brandName)] dfSortedDesc = dfFiltered.sort_values(['price'], ascending=[0]) dfSortedAsc = dfFiltered.sort_values(['price'], ascending=[1]) dfPriceUpper = dfSortedDesc[dfSortedDesc['price'] >= 10000] dictList = [ {'df':dfSortedDesc.head(10), 'sheetName': 'top10'}, {'df':dfSortedAsc.head(10), 'sheetName': 'tail10'}, {'df':dfPriceUpper, 'sheetName': 'upper10000'}, ] save(dictList, brandName + ".xlsx") # price top10 # price 뒤에서 top10
import pandas as pd from libs.excelSaver import save pd.set_option('display.max_colwidth', -1) df = pd.read_json("./195297.json") dicts = [ { 'df': df, 'sheetName': 'total' }, ] save(dicts, "초콜렛.xlsx") print(df.count())
for competitor in competitors: result = df[df['name'].str.contains(competitor)] # 합계 평균 표준편차 sum = result['price'].sum() mean = result['price'].mean() std = result['price'].std() max = result['price'].max() min = result['price'].min() count = result['price'].min() row = { "competitor": competitor, "sum": sum, "mean": mean, "max": max, "min": min, "count": count } reportTable.append(row) return reportTable reportTable = getReportTable(competitors) dfReportTable = pd.DataFrame(reportTable) save(dfReportTable, "week3Report.xlsx") # json으로 만들고 # excel file로 저장 해보세요