def show(self): pd = self.one_hot_code_for_categorical() print pd.head() self.multivariance(['SalePrice','OverallQual','GrLivArea','GarageCars']) self.correlation('SalePrice') self.categorical('SalePrice','OverallQual') self.bivariance('SalePrice','GrLivArea') self.missdata() self.univariance("SalePrice")
def get_head(pd, lines=-1): """ get .head() of the DataFrame. if 'lines' parameter is equel to -1 : use the deafult (usually 5), otherwise : use it. return type : DataFrame """ if lines == -1: return pd.head() else: return pd.head(lines)
def get_only_head_and_tail(pd, lines=-1): """ get .head() + .tail() of the DataFrame. if 'lines' parameter is equel to -1 : use the deafult (usually 5), otherwise : use it. return type : DataFrame """ if lines == -1: return pd.head().append(pd.tail()) else: lines = int(lines / 2) return pd.head(lines).append(pd.tail(lines)) pass
def get_data(interval, symbols, time): url = 'https://api.iextrading.com/1.0/stock/market/batch?symbols=' urlTypes = 'types=chart&' #symbols = "AAPL,F,GE,SPY,FB&" urlRange = 'range=1d&chartInterval=' + interval # symbols = 'MMM,ABT&' # ,ABBV,ABMD,ACN,ATVI,ADBE,AMD,AAP,AES,AET,AMG,AFL,A,APD,AKAM,ALK,ALB,ARE,ALXN,ALGN,ALLE,AGN,ADS,LNT,ALL,GOOGL,GOOG,MO,AMZN,AEE,AAL,AEP,AXP,AIG,AMT,AWK,AMP,ABC,AME,AMGN,APH,APC,ADI,ANSS,ANTM,AON,AOS,APA,AIV,AAPL,AMAT,APTV,ADM,ARNC,ANET,AJG,AIZ,T,ADSK,ADP,AZO,AVB,AVY,BHGE,BLL,BAC,BK,BAX,BBT,BDX,BRK-B,BBY,BIIB,BLK,HRB,BA,BKNG,BWA,BXP,BSX,BHF,BMY,AVGO,BR,BF-B,CHRW,COG,CDNS,CPB,COF,CAH,KMX,CCL,CAT,CBOE,CBRE,CBS,CELG&' r = DailyData.requests.get(url + symbols + urlTypes + urlRange) json = r.json() columns = [ 'date', 'symbol', 'open', 'high', 'low', 'close', 'volume', 'change', 'changePercent', 'vwap' ] pd = DailyData.pd.DataFrame(columns=columns) for stock in json: print(stock) for chart in json[stock]['chart']: if chart['minute'] == time: pd = DailyData.map_to_chart(chart, pd, stock) print(pd.head()) print(pd.tail()) pd.to_pickle("StockDataDaily/" + stock + ".pkl")
import numpy as np import scipy import matplotlib.pyplot as plt import seaborn as sns import pandas as pd # load file path = './data/2020_2019.csv' pd = pd.read_csv(path) print(pd.head())
names=["interactions_ID"]) hippo_all_sig_interactions = pd.read_table( "C:\\Users\\libin\\UCSF\\eQTL\\hippocampus_lh_interactions", sep="\t", names=["chr", "start", "end", "score", "interactions_ID"]) hippo_self_single_end_sig = pd.merge(hippo_self_single_end_sig_ID, hippo_all_sig_interactions, on=["interactions_ID"], how="inner") sns.violinplot(hippo_self_single_end_sig).set(xlim=(3, 20)) sns.violinplot(hippo_self_single_end_sig["score"]).set(xlim=(3, 20)) print(hippo_self_single_end_sig["score"].mean()) print(hippo_self_single_end_sig["score"].median()) hippo_self_single_end_sig_score = pd.read_table( "C:\\Users\\libin\\UCSF\\eQTL\\hippocampus_intersect_sig_single_end.score", sep="\t", names=["score"]) sns.violinplot(hippo_self_single_end_sig_score["score"]).set(xlim=(3, 20)) print(hippo_self_single_end_sig_score["score"].median()) print(hippo_self_single_end_sig_score["score"].mean()) hippocampus_sig_pairs = pd.read_table( "C:\\Users\\libin\\UCSF\\eQTL\\Brain_Hippocampus.v7.signif_variant_gene_pairs.txt", sep="\t") sns.distplot(hippocampus_sig_pairs["score"]) sns.distplot(hippocampus_sig_pairs["tss_distance"]) print(hippocampus_sig_pairs["tss_distance"].max()) print(hippocampus_sig_pairs["tss_distance"].min()) pd.head(hippocampus_sig_pairs) hippocampus_sig_pairs.head(n=2) sns.boxplot(x="variable", y="value", data=hippo_plot1.melt(), palette="Set2")
#Using Panda library for Data extraction and Parsing it . #https://pandas.pydata.org/docs/ import pandas as pd #import panda as pd #Currently using data from yahoo CSV_URL = 'https://query1.finance.yahoo.com/v7/finance/download/TSLA?period1=1558522033&period2=1590144433&interval=1d&events=history' pd = pd.read_csv(CSV_URL) #Read the CSV pd.head() #Print the top 10 lines jus to debug pd.to_csv('TEST.csv') #Save it as TEST ?
#行添加 #df = df.append(df2) #行删除 # Drop rows with label 0 #df = df.drop(0) #基本功能 print pd.T print pd.axes print pd.dtype print pd.empty print pd.ndim print pd.shape print pd.size print pd.values print pd.head() print pd.tail() #描述性统计 #df.sum() #df.mean() #df.std() #count() #median() #mode() #min() #max() #abs() #prod #cumsum #cumprod
import pandas as pd nbaReader = pd.read_csv('seasonstats.csv') print(nbaReader) nbaheadReader = pd.head() nbainfoReader = pd.info()
imdbID = imdbID[-7:] api_key = 'wh7z4wpjqbjm9v7u2xsfcu6f' url = 'http://api.rottentomatoes.com/api/public/v1.0/movie_alias.json' options = {'id': imdbID, 'type': 'imdb', 'apikey': api_key} data = requests.get(url, params=options).text data = json.loads(data) movie_id = 0 movie_id = data['id'] print movie_id if movie_id > 0: url = 'http://api.rottentomatoes.com/api/public/v1.0/movies/%s/reviews.json' % movie_id options = {'review_type':'top_critic', 'page_limit':20, 'page':1, 'apikey':api_key} data = requests.get(url, params=options).text data = json.loads(data) frame = pd.DataFrame(data['reviews']) frame.drop(['links', 'original_score'],inplace=True,axis=1) frame.rename(columns={'freshness': 'fresh', 'date': 'review_date'}, inplace=True) frame['imdb'] = imdbID frame['rtid'] = movie_id frame['title'] = movies.irow(row).title return frame #print frame.head() else: return None rows = 30 dfs = [fetch_reviews(movies, r) for r in range(rows)] pd = pd.concat(dfs, ignore_index=True) print pd.head()
import pandas as pd data = pd.read_csv("data.csv") pd.head()
def dailypreps(): data = pd.read_csv('data/chargers/Allschwile_1.csv') print(pd.head(data))
from sqlalchemy import create_engine # set working directory os.chdir('/Users/a.a.gonzalez.paje/Box Sync/Alberto/rapid_intel/3.0/code') # create database and a table data_base = sqlite3.connect("database.db") # uncomment the following line in case you want to create a dummy table #db.execute('create table person (firstname text, secondname text, age int)') print os.getcwd() # Import data (CSV format) input_data = pd.read_csv("data/diamonds.csv") # print file headers input_data.head(3) # Save dataframe into db # Create your connection. cnx = sqlite3.connect(':memory:') # save data into db sql.write_frame(input_data, name='diamonds', con=cnx) # Get the dataframe back from the db p2 = sql.read_frame('select * from diamonds', cnx) pd.head(p2)
# TODO: multi word (ex.あきれる た def get_sentidic_score(headword, *, type=None): db = get_db(POLITELY_DICT_DB) # TODO: type res = db[SENTIDIC_COLLECTION_NAME].find_one({'headword': headword}) logger.info(res) score = 0 if res: score = res['score'] return score if __name__ == "__main__": pd = load_politly_dic(SENTIDIC_COLLECTION_NAME) print(pd.shape, pd.head(10)) # db[NPTABLE_COLLECTION_NAME].update({'headword':'優れる', 'POS_jp':'動詞' }, # {'$set':{'headword':'優れる', 'reading': 'すぐれる', 'POS':'VERB', 'POS_jp':'動詞', 'eng':'be excellent'}}, # upsert=True) senti_file_noun = app_home + '/dataset/posneg_dic/sent_nouns.dic' # init_senti_dic(senti_file_noun, 'NOUN') senti_file_verb = app_home + '/dataset/posneg_dic/sent_verb_adj.dic' # get_sentidic_score('ない') # get_sentidic_score('合う') # get_sentidic_score('おいしい') get_sentidic_score('無い') get_sentidic_score('無駄') # init_senti_dic(senti_file_verb, 'VERB')
def __init__(self, pd): print(pd.head())
2、列和行必须通过数字引用,当您在列名和列的数字来回往复时,会引起混淆。 在接下来的几个任务中,我们将了解Pandas库,这是最流行的数据分析库之一。Pandas建立在NumPy上,但是更好地解决了NumPy的局限性。 ''' ## 3. Read in a CSV file ## ''' ### 这节讲 ''' import pandas as pd food_info = pd.read_csv('food_info.csv') print(food_info, type(food_info)) ## 4. Exploring the DataFrame ## ''' ### 这节讲 Pandas中最基础的方法和属性 例如: pd.head() pd.columns pd.shape dimensions = pd.shape rows = dimensions[0] columns = dimensions[1] ''' print(food_info.head(3)) dimensions = food_info.shape print(dimensions) num_rows = dimensions[0] print(num_rows)