Exemple #1
0
 def show(self):
     pd = self.one_hot_code_for_categorical()
     print pd.head()
     self.multivariance(['SalePrice','OverallQual','GrLivArea','GarageCars'])
     self.correlation('SalePrice')
     self.categorical('SalePrice','OverallQual')
     self.bivariance('SalePrice','GrLivArea')
     self.missdata()
     self.univariance("SalePrice")
def get_head(pd, lines=-1):
    """
    get .head() of the DataFrame. 
    if 'lines' parameter is equel to -1 : use the deafult (usually 5),
    otherwise : use it. 
    return type : DataFrame
    """
    if lines == -1:
        return pd.head()
    else:
        return pd.head(lines)
def get_only_head_and_tail(pd, lines=-1):
    """
    get .head() + .tail() of the DataFrame. 
    if 'lines' parameter is equel to -1 : use the deafult (usually 5),
    otherwise : use it.    
    return type : DataFrame
    """
    if lines == -1:
        return pd.head().append(pd.tail())
    else:
        lines = int(lines / 2)
        return pd.head(lines).append(pd.tail(lines))

    pass
    def get_data(interval, symbols, time):
        url = 'https://api.iextrading.com/1.0/stock/market/batch?symbols='
        urlTypes = 'types=chart&'
        #symbols = "AAPL,F,GE,SPY,FB&"
        urlRange = 'range=1d&chartInterval=' + interval

        # symbols = 'MMM,ABT&'  # ,ABBV,ABMD,ACN,ATVI,ADBE,AMD,AAP,AES,AET,AMG,AFL,A,APD,AKAM,ALK,ALB,ARE,ALXN,ALGN,ALLE,AGN,ADS,LNT,ALL,GOOGL,GOOG,MO,AMZN,AEE,AAL,AEP,AXP,AIG,AMT,AWK,AMP,ABC,AME,AMGN,APH,APC,ADI,ANSS,ANTM,AON,AOS,APA,AIV,AAPL,AMAT,APTV,ADM,ARNC,ANET,AJG,AIZ,T,ADSK,ADP,AZO,AVB,AVY,BHGE,BLL,BAC,BK,BAX,BBT,BDX,BRK-B,BBY,BIIB,BLK,HRB,BA,BKNG,BWA,BXP,BSX,BHF,BMY,AVGO,BR,BF-B,CHRW,COG,CDNS,CPB,COF,CAH,KMX,CCL,CAT,CBOE,CBRE,CBS,CELG&'

        r = DailyData.requests.get(url + symbols + urlTypes + urlRange)
        json = r.json()
        columns = [
            'date', 'symbol', 'open', 'high', 'low', 'close', 'volume',
            'change', 'changePercent', 'vwap'
        ]
        pd = DailyData.pd.DataFrame(columns=columns)

        for stock in json:
            print(stock)
            for chart in json[stock]['chart']:
                if chart['minute'] == time:
                    pd = DailyData.map_to_chart(chart, pd, stock)

        print(pd.head())
        print(pd.tail())
        pd.to_pickle("StockDataDaily/" + stock + ".pkl")
import numpy as np
import scipy
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

# load file
path = './data/2020_2019.csv'
pd = pd.read_csv(path)
print(pd.head())

Exemple #6
0
    names=["interactions_ID"])
hippo_all_sig_interactions = pd.read_table(
    "C:\\Users\\libin\\UCSF\\eQTL\\hippocampus_lh_interactions",
    sep="\t",
    names=["chr", "start", "end", "score", "interactions_ID"])
hippo_self_single_end_sig = pd.merge(hippo_self_single_end_sig_ID,
                                     hippo_all_sig_interactions,
                                     on=["interactions_ID"],
                                     how="inner")
sns.violinplot(hippo_self_single_end_sig).set(xlim=(3, 20))
sns.violinplot(hippo_self_single_end_sig["score"]).set(xlim=(3, 20))
print(hippo_self_single_end_sig["score"].mean())
print(hippo_self_single_end_sig["score"].median())
hippo_self_single_end_sig_score = pd.read_table(
    "C:\\Users\\libin\\UCSF\\eQTL\\hippocampus_intersect_sig_single_end.score",
    sep="\t",
    names=["score"])
sns.violinplot(hippo_self_single_end_sig_score["score"]).set(xlim=(3, 20))
print(hippo_self_single_end_sig_score["score"].median())
print(hippo_self_single_end_sig_score["score"].mean())
hippocampus_sig_pairs = pd.read_table(
    "C:\\Users\\libin\\UCSF\\eQTL\\Brain_Hippocampus.v7.signif_variant_gene_pairs.txt",
    sep="\t")
sns.distplot(hippocampus_sig_pairs["score"])
sns.distplot(hippocampus_sig_pairs["tss_distance"])
print(hippocampus_sig_pairs["tss_distance"].max())
print(hippocampus_sig_pairs["tss_distance"].min())
pd.head(hippocampus_sig_pairs)
hippocampus_sig_pairs.head(n=2)

sns.boxplot(x="variable", y="value", data=hippo_plot1.melt(), palette="Set2")
Exemple #7
0
#Using Panda library for Data extraction and Parsing it .
#https://pandas.pydata.org/docs/

import pandas as pd #import panda as pd
#Currently using data from yahoo
CSV_URL = 'https://query1.finance.yahoo.com/v7/finance/download/TSLA?period1=1558522033&period2=1590144433&interval=1d&events=history'
pd = pd.read_csv(CSV_URL) #Read the CSV

pd.head() #Print the top 10 lines jus to debug 

pd.to_csv('TEST.csv') #Save it as TEST ?
Exemple #8
0
#行添加
#df = df.append(df2)
#行删除
# Drop rows with label 0
#df = df.drop(0)

#基本功能
print pd.T
print pd.axes
print pd.dtype
print pd.empty
print pd.ndim
print pd.shape
print pd.size
print pd.values
print pd.head()
print pd.tail()

#描述性统计
#df.sum()
#df.mean()
#df.std()
#count()
#median()
#mode()
#min()
#max()
#abs()
#prod
#cumsum
#cumprod
import pandas as pd
nbaReader = pd.read_csv('seasonstats.csv')
print(nbaReader)
nbaheadReader = pd.head()
nbainfoReader = pd.info()
Exemple #10
0
    imdbID = imdbID[-7:]
    api_key = 'wh7z4wpjqbjm9v7u2xsfcu6f'
    url = 'http://api.rottentomatoes.com/api/public/v1.0/movie_alias.json'
    options = {'id': imdbID, 'type': 'imdb', 'apikey': api_key}
    data = requests.get(url, params=options).text
    data = json.loads(data)
    movie_id = 0
    movie_id = data['id']
    print movie_id
    if movie_id > 0:
        url = 'http://api.rottentomatoes.com/api/public/v1.0/movies/%s/reviews.json' % movie_id
        options = {'review_type':'top_critic', 'page_limit':20, 'page':1, 'apikey':api_key}
        data = requests.get(url, params=options).text
        data = json.loads(data)
        frame = pd.DataFrame(data['reviews'])
        frame.drop(['links', 'original_score'],inplace=True,axis=1) 
        frame.rename(columns={'freshness': 'fresh', 'date': 'review_date'}, inplace=True)
        frame['imdb'] = imdbID
        frame['rtid'] = movie_id
        frame['title'] = movies.irow(row).title
        return frame
        #print frame.head()
    else:
        return None

rows = 30

dfs = [fetch_reviews(movies, r) for r in range(rows)]
pd = pd.concat(dfs, ignore_index=True)
print pd.head()
Exemple #11
0
import pandas as pd

data = pd.read_csv("data.csv")
pd.head()
Exemple #12
0
def dailypreps():
    data = pd.read_csv('data/chargers/Allschwile_1.csv')
    print(pd.head(data))
Exemple #13
0
from sqlalchemy import create_engine

# set working directory
os.chdir('/Users/a.a.gonzalez.paje/Box Sync/Alberto/rapid_intel/3.0/code')

# create database and a table
data_base = sqlite3.connect("database.db")

# uncomment the following line in case you want to create a dummy table
#db.execute('create table person (firstname text, secondname text, age int)')

print os.getcwd()

# Import data (CSV format)
input_data = pd.read_csv("data/diamonds.csv")

# print file headers
input_data.head(3)

# Save dataframe into db
# Create your connection.
cnx = sqlite3.connect(':memory:')

# save data into db
sql.write_frame(input_data, name='diamonds', con=cnx)

# Get the dataframe back from the db
p2 = sql.read_frame('select * from diamonds', cnx)

pd.head(p2)
Exemple #14
0
# TODO: multi word (ex.あきれる た
def get_sentidic_score(headword, *, type=None):
    db = get_db(POLITELY_DICT_DB)
    # TODO: type
    res = db[SENTIDIC_COLLECTION_NAME].find_one({'headword': headword})
    logger.info(res)
    score = 0
    if res:
        score = res['score']
    return score


if __name__ == "__main__":
    pd = load_politly_dic(SENTIDIC_COLLECTION_NAME)
    print(pd.shape, pd.head(10))

    # db[NPTABLE_COLLECTION_NAME].update({'headword':'優れる', 'POS_jp':'動詞' },
    #                                    {'$set':{'headword':'優れる', 'reading': 'すぐれる', 'POS':'VERB', 'POS_jp':'動詞', 'eng':'be excellent'}},
    #                                    upsert=True)

    senti_file_noun = app_home + '/dataset/posneg_dic/sent_nouns.dic'
    # init_senti_dic(senti_file_noun, 'NOUN')
    senti_file_verb = app_home + '/dataset/posneg_dic/sent_verb_adj.dic'
    # get_sentidic_score('ない')
    # get_sentidic_score('合う')
    # get_sentidic_score('おいしい')
    get_sentidic_score('無い')
    get_sentidic_score('無駄')

    # init_senti_dic(senti_file_verb, 'VERB')
Exemple #15
0
 def __init__(self, pd):
     print(pd.head())
Exemple #16
0
2、列和行必须通过数字引用,当您在列名和列的数字来回往复时,会引起混淆。

在接下来的几个任务中,我们将了解Pandas库,这是最流行的数据分析库之一。Pandas建立在NumPy上,但是更好地解决了NumPy的局限性。
'''

## 3. Read in a CSV file ##
'''
### 这节讲
'''
import pandas as pd

food_info = pd.read_csv('food_info.csv')
print(food_info, type(food_info))

## 4. Exploring the DataFrame ##
'''
### 这节讲 Pandas中最基础的方法和属性
例如:
pd.head()
pd.columns
pd.shape
dimensions = pd.shape
rows = dimensions[0]
columns = dimensions[1]
'''

print(food_info.head(3))
dimensions = food_info.shape
print(dimensions)
num_rows = dimensions[0]
print(num_rows)