예제 #1
0
    def cleaned_maturity_data(self, expiry):
        filtedData = self.optionData
        data = filtedData.loc[(slice(None), expiry, slice(None)), :]
        otmPut = data.loc[slice(None, self.underlying_price), ["Last"]]
        times = 0

        for index in range(len(otmPut) - 1, -1, -1):
            row = otmPut.iloc[index]

            if row.Last == 0.01:
                times = times + 1

            if times > 2:
                data.drop(row.name, inplace=True)

        otmCall = data.loc[slice(self.underlying_price, None), ["Last"]]
        times = 0

        for index in range(0, len(otmCall)):
            row = otmCall.iloc[index]
            if row.Last == 0.01:
                times = times + 1

            if times > 2:
                data.drop(row.name, inplace=True)

        return data.loc[data["Last"] > 0]
예제 #2
0
def subsample_data(filename_data, filename_symbology, dir_pickle, start_date, end_date, query_attribute, query_criteria, include_avg):
    query_criteria_filename = '-'.join(query_criteria[:3])
    pickle_name = dir_pickle+'pickle_sentiment_'+start_date+'_'+end_date+'_'+query_attribute+'_'+query_criteria_filename+'_'+str(include_avg)+'.p'
    try: 
        data = pd.read_pickle(pickle_name)
        print("Loaded from pre-created pickle")
    except:
        print("Subsampling data from csv")
        # try to read first from pickle
        # read csv
        data = pd.read_csv(filename_data)
        # merge with symbology csv for additional info
        data_symbology = pd.read_csv(filename_symbology)
        # convert headers to uppercase for ease of use
        data_symbology.columns = [x.upper() for x in data_symbology.columns]
        data = pd.merge(data, data_symbology, left_on='SYMBOL', right_on='SYMBOL', how = "left")
        # perform filter query based on parameters
        data = data[data[query_attribute].isin(query_criteria)]
        # convert timestamps to datetime objects
        data['DATE'] = data['TIMESTAMP_UTC'].apply(lambda x: datetime.strptime(x,'%Y-%m-%dT%H:%M:%SZ'))
        data['DATE'] = data['DATE'].apply(lambda x: x.strftime('%x'))
        data['DATE'] = data['DATE'].apply(lambda x: pd.to_datetime(x))
        # query between start and end date
        data = data[(data['DATE'] >= start_date) & (data['DATE'] <= end_date)]
        # remove avg
        if include_avg == False:
            avg_cols = [col for col in data.columns if 'AVG' in col]
            data.drop(avg_cols,inplace=True,axis=1)
        # save as pickle
        data.to_pickle(pickle_name)
    # return dataframe
    return data
예제 #3
0
    def cleaned_maturity_data(self, expiry):
        filtedData = self.optionData
        data = filtedData.loc[(slice(None), expiry, slice(None)), :]
        otmPut = data.loc[slice(None, self.underlying_price), ["Last"]]
        times = 0

        for index in range(len(otmPut) - 1, -1, -1):
            row = otmPut.iloc[index]

            if row.Last == 0.01:
                times = times + 1

            if times > 2:
                data.drop(row.name, inplace=True)

        otmCall = data.loc[slice(self.underlying_price, None), ["Last"]]
        times = 0

        for index in range(0, len(otmCall)):
            row = otmCall.iloc[index]
            if row.Last == 0.01:
                times = times + 1

            if times > 2:
                data.drop(row.name, inplace=True)

        return data.loc[data["Last"] > 0]
예제 #4
0
파일: functions.py 프로젝트: Anhmike/wmcm
def get_returns(df):
    '''Gets returns...'''

    ## make a deep copy of the input data so we don't accidentally change it
    data = df.copy()

    data['lastClose'] = data['close'].shift(1)
    data['ret_cc'] = np.log(data['close'] / data['lastClose']) # close to close return
    data['ret_oc'] = np.log(data['close'] / data['open']) # open to close return
    data['ret_co'] = np.log(data['open'] / data['lastClose']) # close to open return

    ## what is this for?
    data.drop('lastClose', 1, inplace=True)

    return data
예제 #5
0
def stock_pic(data, time_interval, start_date, end_date, gradient_period):
    x, y = [], []
    print(data)
    trial_x, trial_y = 0, 0
    while len(x) == 0 and len(y) == 0:
        x = data[data['date'] == str(end_date)].index.values
        y = data[data['date'] == str(start_date)].index.values
        if len(x) == 0:
            end_date += datetime.timedelta(minutes=1)
            trial_x += 1
        if len(y) == 0:
            start_date += datetime.timedelta(minutes=1)
            trial_y += 1
        print(x, y)

    x, y = x - trial_x, y - trial_y
    print(x, y)
    data['6. AVG'] = (data['1. open'] + data['4. close']) / 2
    data['EWMA-AVG'] = data['6. AVG'].ewm(span=3).mean()
    gradient = (data['EWMA-AVG'][int(x)] -
                data['EWMA-AVG'][int(x) - gradient_period]) / gradient_period
    grad_label = torch.ones(1) if gradient > 0 else torch.zeros(1)

    data = data[int(y):int(x)]
    data = data.drop(['5. volume'], axis=1)
    fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(8, 8))
    ax.plot(data['1. open'], c='g', alpha=1)
    ax.plot(data['4. close'], c='r', alpha=1)
    ax.plot(data['EWMA-AVG'], c='b', alpha=1)
    plt.axis('off')

    return __ImageToTensor(fig).cuda() if torch.cuda.is_available(
    ) else __ImageToTensor(
        fig), grad_label.cuda() if torch.cuda.is_available() else grad_label
예제 #6
0
def stock_pic(data, time_interval, start_date, end_date, gradient_period):
    x = data[data['date'] == str(end_date)].index.values
    y = data[data['date'] == str(start_date)].index.values
    if not x:
        x = x + 1
    elif not y:
        y = y + 1
    else:
        pass

    data['6. AVG'] = (data['1. open'] + data['4. close']) / 2
    data['EWMA-AVG'] = data['6. AVG'].ewm(span=3).mean()
    gradient = (data['EWMA-AVG'][int(x)] -
                data['EWMA-AVG'][int(x) - gradient_period]) / gradient_period
    grad_label = torch.ones(1) if gradient > 0 else torch.zeros(1)

    data = data[int(x):int(y)]
    data = data.drop(['5. volume'], axis=1)
    fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(8, 8))
    ax.plot(data['1. open'], c='g', alpha=1)
    ax.plot(data['4. close'], c='r', alpha=1)
    ax.plot(data['EWMA-AVG'], c='b', alpha=1)
    plt.axis('off')
    plt.show()

    return __ImageToTensor(fig).cuda() if torch.cuda.is_available(
    ) else __ImageToTensor(
        fig), grad_label.cuda() if torch.cuda.is_available() else grad_label
예제 #7
0
def normalise_stock_data(data):

    # ADJ data
    data_adj = data

    #data_adj['Date'] = data.index.values+1
    for i in range(0, data.index.shape[0]):
        data_adj.loc[
            data.index[i],
            'Ordinal/1e6'] = data.index[i].to_pydatetime().toordinal() / 1e6
        data_adj.loc[data.index[i],
                     'Weekday'] = data.index[i].to_pydatetime().weekday()

    data_adj = data.drop(data.columns[[0, 1, 2, 3, 4, 5]], axis=1)

    data_adj['Adj'] = data['Adj Close'] / data['Close']

    data_adj['Adj Volume'] = data['Volume']
    #data_adj['Adj Volume'] -= np.min(data_adj['Adj Volume'])
    data_adj['Adj Volume'] /= np.max(data_adj['Adj Volume'])

    data_adj['Adj Close'] = data['Adj Close'] / data['Adj Close'][0]
    data_adj[
        'Adj Open'] = data['Open'] * data_adj['Adj'] / data['Adj Close'][0]
    data_adj[
        'Adj High'] = data['High'] * data_adj['Adj'] / data['Adj Close'][0]
    data_adj['Adj Low'] = data['Low'] * data_adj['Adj'] / data['Adj Close'][0]

    data_adj.loc[data.index[0], 'Normalised Volume'] = 1
    data_adj.loc[data.index[1:], 'Normalised Volume'] = data_adj['Adj Volume'][
        1:] / data_adj['Adj Close'][:-1].values
    data_adj.loc[data.index, 'Normalised Volume'] -= 1

    data_adj.loc[data.index[0], 'Normalised Close'] = 1
    data_adj.loc[data.index[1:], 'Normalised Close'] = data_adj['Adj Close'][
        1:] / data_adj['Adj Close'][:-1].values
    data_adj.loc[data.index, 'Normalised Close'] -= 1

    data_adj.loc[data.index[0], 'Normalised Open'] = 1
    data_adj.loc[data.index[1:], 'Normalised Open'] = data_adj['Adj Open'][
        1:] / data_adj['Adj Close'][:-1].values
    data_adj.loc[data.index, 'Normalised Open'] -= 1

    data_adj.loc[data.index[0], 'Normalised High'] = 1
    data_adj.loc[data.index[1:], 'Normalised High'] = data_adj['Adj High'][
        1:] / data_adj['Adj Close'][:-1].values
    data_adj.loc[data.index, 'Normalised High'] -= 1

    data_adj.loc[data.index[0], 'Normalised Low'] = 1
    data_adj.loc[data.index[1:], 'Normalised Low'] = data_adj['Adj Low'][
        1:] / data_adj['Adj Close'][:-1].values
    data_adj.loc[data.index, 'Normalised Low'] -= 1

    #reduce some mean
    data_adj = data_adj.drop(['Adj'], axis=1)

    return data_adj
예제 #8
0
def download_ohlc(sector_tickers, start, end):
    sector_ohlc = {}
    for sector, tickers in sector_tickers.items():
        print('Downloading data from Yahoo for %s sector' % sector)
        data = pdr.get_data_yahoo(tickers, start, end)
        for item in ['Open', 'High', 'Low']:
            data[item] = data[item] * data['Adj Close'] / data['Close']
        data.rename(items={
            'Open': 'open',
            'High': 'high',
            'Low': 'low',
            'Adj Close': 'close',
            'Volume': 'volume'
        },
                    inplace=True)
        data.drop(['Close'], inplace=True)
        sector_ohlc[sector] = data
    print('Finished downloading data')
    return sector_ohlc
예제 #9
0
    def __solver_fitted_iv_function(self, data, t, s):

        print("Solver fitted implied volatility function...")

        # fitted iv function
        def calculate_var(a, v, p, F, k, iv):
            result = self.calculate_sabr(a, v, p, F, k, t)
            var = ((iv - result) / iv)**2
            return var

        def F(x):
            params = data
            a = x[0]
            v = x[1]
            p = x[2]

            params.loc[:, "Var"] = params.apply(
                lambda row: calculate_var(a, v, p, s, row.name, row.Iv),
                axis=1)

            result = params.loc[:, "Var"].sum()

            return result

        bnds = ((0.000001, None), (0.000001, math.sqrt(1 / t)), (-0.999999,
                                                                 0.999999))
        x = scipy.optimize.minimize(F, [0.5, 0.5, 0.5], bounds=bnds)

        if x.fun > 2.0:
            data.drop(data[data["Iv"] == min(data.Iv)].index, inplace=True)
            data.drop(data[data["Iv"] == max(data.Iv)].index, inplace=True)
            return self.__solver_fitted_iv_function(data, t, s)

        fitted_params = x.x

        print("Function solved. Function value: " + str(x.fun))
        # print(x)
        return fitted_params
예제 #10
0
    def __solver_fitted_iv_function(self, data, t, s):

        print("Solver fitted implied volatility function...")

        # fitted iv function
        def calculate_var(a, v, p, F, k, iv):
            result = self.calculate_sabr(a, v, p, F, k, t)
            var = ((iv - result) / iv) ** 2
            return var

        def F(x):
            params = data
            a = x[0]
            v = x[1]
            p = x[2]

            params.loc[:, "Var"] = params.apply(lambda row: calculate_var(a, v, p, s,
                                                row.name, row.Iv), axis=1)

            result = params.loc[:, "Var"].sum()

            return result

        bnds = ((0.000001,None), (0.000001, math.sqrt(1 / t)), (-0.999999, 0.999999))
        x = scipy.optimize.minimize(F, [0.5, 0.5, 0.5], bounds=bnds)

        if x.fun > 2.0:
            data.drop(data[data["Iv"]==min(data.Iv)].index, inplace=True)
            data.drop(data[data["Iv"]==max(data.Iv)].index, inplace=True)
            return self.__solver_fitted_iv_function(data, t, s)

        fitted_params = x.x

        print("Function solved. Function value: " + str(x.fun))
        # print(x)
        return fitted_params
예제 #11
0
def normalise_stock_data(data):


    # ADJ data
    data_adj=data
    
    #data_adj['Date'] = data.index.values+1
    for i in range(0,data.index.shape[0]):
        data_adj.loc[data.index[i],'Ordinal/1e6'] = data.index[i].to_pydatetime().toordinal()/1e6
        data_adj.loc[data.index[i],'Weekday']     = data.index[i].to_pydatetime().weekday()

    data_adj=data.drop(data.columns[[0,1,2,3,4,5]], axis=1)
    
    data_adj['Adj'] = data['Adj Close']/data['Close']
    
    data_adj['Adj Volume'] = data['Volume']
    #data_adj['Adj Volume'] -= np.min(data_adj['Adj Volume'])
    data_adj['Adj Volume'] /= np.max(data_adj['Adj Volume'])
    
    data_adj['Adj Close'] = data['Adj Close'] / data['Adj Close'][0] 
    data_adj['Adj Open'] = data['Open']*data_adj['Adj'] / data['Adj Close'][0] 
    data_adj['Adj High'] = data['High']*data_adj['Adj'] / data['Adj Close'][0] 
    data_adj['Adj Low']  = data['Low'] *data_adj['Adj'] / data['Adj Close'][0] 
    
    data_adj.loc[data.index[0],'Normalised Volume'] = 1
    data_adj.loc[data.index[1:],'Normalised Volume'] = data_adj['Adj Volume'][1:] / data_adj['Adj Close'][:-1].values
    data_adj.loc[data.index,'Normalised Volume'] -= 1

    data_adj.loc[data.index[0],'Normalised Close'] = 1
    data_adj.loc[data.index[1:],'Normalised Close'] = data_adj['Adj Close'][1:] / data_adj['Adj Close'][:-1].values
    data_adj.loc[data.index,'Normalised Close'] -= 1

    data_adj.loc[data.index[0],'Normalised Open'] = 1
    data_adj.loc[data.index[1:],'Normalised Open'] = data_adj['Adj Open'][1:] / data_adj['Adj Close'][:-1].values
    data_adj.loc[data.index,'Normalised Open'] -= 1

    data_adj.loc[data.index[0],'Normalised High'] = 1
    data_adj.loc[data.index[1:],'Normalised High'] = data_adj['Adj High'][1:] / data_adj['Adj Close'][:-1].values
    data_adj.loc[data.index,'Normalised High'] -= 1

    data_adj.loc[data.index[0],'Normalised Low'] = 1
    data_adj.loc[data.index[1:],'Normalised Low'] = data_adj['Adj Low'][1:] / data_adj['Adj Close'][:-1].values
    data_adj.loc[data.index,'Normalised Low'] -= 1

    #reduce some mean
    data_adj=data_adj.drop(['Adj'], axis=1)

    return data_adj
예제 #12
0
from sklearn import linear_model

#List with stocks to analyze.
stocks =['BCBA:BMA']

start = datetime.date(2017,1,1)
end = datetime.date(2017,10,6)



#Read 'OLHC' data from google finance
data = data.DataReader(stocks[0],'google',start,end)

#Linear regression for close price.
#Split train and test sets
X = data.drop(['Close'],1)
y = data['Close']

X_train = X[:-50]   
X_test = X[-50:]
X_test.dropna(inplace=True)
y_train = y[:-50]
y_test = y[-50:]

reg = linear_model.LinearRegression()
reg.fit(X_train,y_train)




#----------------------------------------------------------------------------------------------------------
예제 #13
0
def getTrendingNews(q1):

    try:
        newsapi = NewsApiClient(api_key='Your Key')
    except:
        newsapi = NewsApiClient(api_key='Your another key incase of exception')
    end = date.today()
    start = date.today() - relativedelta(days=+10)

    s1 = str(start).split(" ")[0]
    e1 = str(end).split(" ")[0]

    try:
        news = newsapi.get_everything(q=q1,
                                      from_param=s1,
                                      to=e1,
                                      language='en',
                                      sort_by='popularity',
                                      page_size=100,
                                      page=1)
        print(news, "1")

        #arranging the news articles in a numpy array
        news_data = [
        ]  #np.array(['publishedAt','title','description','content','url'])
        index = 0
        for i in news['articles']:
            k = 0
            data = []
            while k != 1:
                data.append(i["publishedAt"])
                data.append(i["title"])
                data.append(i["description"])
                data.append(i["content"])
                data.append(i["url"])
                data.append(i["urlToImage"])
                k = 1
            news_data.append(data)

        nd = np.array(news_data)

        data = pd.DataFrame(nd,
                            columns=[
                                'Date', 'Title', 'Description', 'Content',
                                'URL', 'Image'
                            ])

        #Spliting the date and time of each data field.
        new = data["Date"].str.split("T", n=1, expand=True)
        new1 = new[1].str.split("Z", n=1, expand=True)
        data = data.drop("Date", axis=1)
        data.insert(loc=0, column='Date', value=new[0])
        data.insert(loc=1, column='Time', value=new1[0])

        data['Date'] = pd.to_datetime(data.Date)
        ##        data = data.sort_values(by='Date')

        data = data.sort_values(by=['Date'], ascending=False)

        return data
    except Exception as e:
        print(e)

        try:
            newsapi = NewsApiClient(api_key='Your another Key')
            news = newsapi.get_everything(q=q1,
                                          from_param=s1,
                                          to=e1,
                                          language='en',
                                          sort_by='popularity',
                                          page_size=100,
                                          page=1)
            print(news, "1")

            #arranging the news articles in a numpy array
            news_data = [
            ]  #np.array(['publishedAt','title','description','content','url'])
            index = 0
            for i in news['articles']:
                k = 0
                data = []
                while k != 1:
                    data.append(i["publishedAt"])
                    data.append(i["title"])
                    data.append(i["description"])
                    data.append(i["content"])
                    data.append(i["url"])
                    data.append(i["urlToImage"])
                    k = 1
                news_data.append(data)

            nd = np.array(news_data)

            data = pd.DataFrame(nd,
                                columns=[
                                    'Date', 'Title', 'Description', 'Content',
                                    'URL', 'Image'
                                ])

            #Spliting the date and time of each data field.
            new = data["Date"].str.split("T", n=1, expand=True)
            new1 = new[1].str.split("Z", n=1, expand=True)
            data = data.drop("Date", axis=1)
            data.insert(loc=0, column='Date', value=new[0])
            data.insert(loc=1, column='Time', value=new1[0])

            data['Date'] = pd.to_datetime(data.Date)
            ##        data = data.sort_values(by='Date')

            data = data.sort_values(by=['Date'], ascending=False)

            return data
        except:
            return []
        return []
예제 #14
0
def getWordCloud(q1):

    text = "Insert Your text here"
    try:
        newsapi = NewsApiClient(api_key='Your Key')
    except:
        newsapi = NewsApiClient(api_key='Your another Key incase of exception')

    end = date.today()
    start = date.today() - relativedelta(days=+10)

    s1 = str(start).split(" ")[0]
    e1 = str(end).split(" ")[0]

    try:
        news = newsapi.get_everything(q=q1,
                                      from_param=s1,
                                      to=e1,
                                      language='en',
                                      sort_by='popularity',
                                      page_size=100,
                                      page=1)

        #arranging the news articles in a numpy array
        news_data = [
        ]  #np.array(['publishedAt','title','description','content','url'])
        index = 0
        for i in news['articles']:
            k = 0
            data = []
            while k != 1:
                data.append(i["publishedAt"])
                data.append(i["title"])
                data.append(i["description"])
                data.append(i["content"])
                data.append(i["url"])
                data.append(i["urlToImage"])
                k = 1
            news_data.append(data)

        nd = np.array(news_data)

        data = pd.DataFrame(nd,
                            columns=[
                                'Date', 'Title', 'Description', 'Content',
                                'URL', 'Image'
                            ])

        #Spliting the date and time of each data field.
        new = data["Date"].str.split("T", n=1, expand=True)
        new1 = new[1].str.split("Z", n=1, expand=True)
        data = data.drop("Date", axis=1)
        data.insert(loc=0, column='Date', value=new[0])
        data.insert(loc=1, column='Time', value=new1[0])

        data['Date'] = pd.to_datetime(data.Date)
        dff = data[['Date', 'Description']].groupby('Date').sum()

        text = " ".join(list(dff['Description'])) + q1 * 15

        wave_mask = np.array(Image.open("assets/cloud.jpg"))
        wordcloud = WordCloud(mask=wave_mask,
                              width=512,
                              height=512,
                              colormap="Greens").generate(text)
        in1 = str(random.randint(0, 100000000000000))
        in2 = str(random.randint(0, 100000000000000))
        in3 = str(random.randint(0, 100000000000000))
        in4 = in1 + in2 + in3

        shutil.rmtree('assets/images')
        os.mkdir("assets/images")
        # Try
        wordcloud.to_file("assets/images/" + in4 + ".jpg")

        print("saved...")

        ##        img1 = base64.b64encode(open("assets/images/"+in4+".jpg", 'rb').read())
        with open("assets/images/" + in4 + ".jpg", "rb") as imageFile:
            img1 = base64.b64encode(imageFile.read()).decode("utf-8")

        print("Yes\n\n")
        return [img1]

    except Exception as e:
        print(e)

        try:
            newsapi = NewsApiClient(api_key='Your another key')
            news = newsapi.get_everything(q=q1,
                                          from_param=s1,
                                          to=e1,
                                          language='en',
                                          sort_by='popularity',
                                          page_size=100,
                                          page=1)
            print(news, "1")

            #arranging the news articles in a numpy array
            news_data = [
            ]  #np.array(['publishedAt','title','description','content','url'])
            index = 0
            for i in news['articles']:
                k = 0
                data = []
                while k != 1:
                    data.append(i["publishedAt"])
                    data.append(i["title"])
                    data.append(i["description"])
                    data.append(i["content"])
                    data.append(i["url"])
                    data.append(i["urlToImage"])
                    k = 1
                news_data.append(data)

            nd = np.array(news_data)

            data = pd.DataFrame(nd,
                                columns=[
                                    'Date', 'Title', 'Description', 'Content',
                                    'URL', 'Image'
                                ])

            #Spliting the date and time of each data field.
            new = data["Date"].str.split("T", n=1, expand=True)
            new1 = new[1].str.split("Z", n=1, expand=True)
            data = data.drop("Date", axis=1)
            data.insert(loc=0, column='Date', value=new[0])
            data.insert(loc=1, column='Time', value=new1[0])

            data['Date'] = pd.to_datetime(data.Date)
            ##        data = data.sort_values(by='Date')

            data = data.sort_values(by=['Date'], ascending=False)

            return data
        except:
            return []
        return []
예제 #15
0
def getNews(df,s1,e1,q1):
    
    try:
        newsapi = NewsApiClient(api_key='Your Key')
    except:
        newsapi = NewsApiClient(api_key='Your another key in case of exception')

    try:
        news = newsapi.get_everything(q=q1,from_param=s1,
                                              to=e1,language='en',sort_by='popularity',page_size=100,page=1)

        #arranging the news articles in a numpy array
        news_data = [] #np.array(['publishedAt','title','description','content','url'])
        index = 0
        for i in news['articles']:
          k=0
          data = []
          while k!=1:
            data.append(i["publishedAt"])
            data.append(i["title"])
            data.append(i["description"])
            data.append(i["content"])
            data.append(i["url"])
            k=1
          news_data.append(data)
          
        nd = np.array(news_data)


        data = pd.DataFrame(nd,columns=['Date','Title','Description','Content','URL'])


        #Spliting the date and time of each data field.
        new = data["Date"].str.split("T", n = 1, expand = True)
        new1 = new[1].str.split("Z", n=1,expand=True)
        data = data.drop("Date",axis=1)
        data.insert(loc=0, column='Date', value=new[0])
        data.insert(loc=1, column='Time', value=new1[0])


        data['Date'] =pd.to_datetime(data.Date)
        data = data.sort_values(by='Date')
        dff=data[['Date','Title']].groupby('Date').sum()

 
        
        lTemp={'Date':[],'Title':[]}

        for i in range(len(df)):
            lTemp['Date'].append(df.index[i])
            lTemp['Title'].append(" ")
            
              


        for i in range(len(lTemp['Date'])):
            if lTemp['Date'][i] in list(dff.index):
                lTemp['Title'][i]=dff['Title'][list(dff.index).index(lTemp['Date'][i])]
            else:
                pass
            
        lTemp=pd.DataFrame(lTemp)

        print(lTemp.head())
        
        freq_Vec=createVector(lTemp)

        if freq_Vec.shape[1]>657:
            freq_Vec=freq_Vec.tocsr()[:,0:658]
        else:
            q=np.array(freq_Vec.todense())
            b=np.zeros((freq_Vec.shape[0],657-freq_Vec.shape[1]))
            p = np.concatenate((q,b),axis=1)
            freq_Vec = csr_matrix(p)
            
        X_test = freq_Vec.toarray()
        mean = np.mean(X_test)
        X_test -= mean
            
        prediction=predict_news(X_test)
        ypred=prediction
        pol_score = []
        
        maxScore=max(list(df['Volume']))
        minScore=min(list(df['Volume']))
        finScore=(maxScore+minScore)//2     
        #prediction=predict_news(X_test)
        #ypred=prediction
        pol_score = []
        for i in range(len(ypred)):
            pol_score.append((ypred[i,1]-ypred[i,0])*finScore)


        df['Polarity']=pol_score

        return df
    except Exception as e:
        print(e)
        df['Polarity']=[0 for i in range(len(df))]
        

        try:
            newsapi = NewsApiClient(api_key='Your another Key')
            news = newsapi.get_everything(q=q1,from_param=s1,
                                              to=e1,language='en',sort_by='popularity',page_size=100,page=1)
            print(news,"1")

            #arranging the news articles in a numpy array
            news_data = [] #np.array(['publishedAt','title','description','content','url'])
            index = 0
            for i in news['articles']:
              k=0
              data = []
              while k!=1:
                data.append(i["publishedAt"])
                data.append(i["title"])
                data.append(i["description"])
                data.append(i["content"])
                data.append(i["url"])
                data.append(i["urlToImage"])
                k=1
              news_data.append(data)
              
            nd = np.array(news_data)


            data = pd.DataFrame(nd,columns=['Date','Title','Description','Content','URL','Image'])


            #Spliting the date and time of each data field.
            new = data["Date"].str.split("T", n = 1, expand = True)
            new1 = new[1].str.split("Z", n=1,expand=True)
            data = data.drop("Date",axis=1)
            data.insert(loc=0, column='Date', value=new[0])
            data.insert(loc=1, column='Time', value=new1[0])


            data['Date'] =pd.to_datetime(data.Date)
    ##        data = data.sort_values(by='Date')

            data=data.sort_values(by=['Date'])
            dff=data[['Date','Title']].groupby('Date').sum()

 
            
            lTemp={'Date':[],'Title':[]}

            for i in range(len(df)):
                lTemp['Date'].append(df.index[i])
                lTemp['Title'].append(" ")
                
                  


            for i in range(len(lTemp['Date'])):
                if lTemp['Date'][i] in list(dff.index):
                    lTemp['Title'][i]=dff['Title'][list(dff.index).index(lTemp['Date'][i])]
                else:
                    pass
                
            lTemp=pd.DataFrame(lTemp)

            print(lTemp.head())
            
            freq_Vec=createVector(lTemp)

            if freq_Vec.shape[1]>657:
                freq_Vec=freq_Vec.tocsr()[:,0:658]
            else:
                q=np.array(freq_Vec.todense())
                b=np.zeros((freq_Vec.shape[0],657-freq_Vec.shape[1]))
                p = np.concatenate((q,b),axis=1)
                freq_Vec = csr_matrix(p)
                
            X_test = freq_Vec.toarray()
            mean = np.mean(X_test)
            X_test -= mean


            maxScore=max(list(df['Volume']))
            minScore=min(list(df['Volume']))
            finScore=(maxScore+minScore)//2     
            prediction=predict_news(X_test)
            ypred=prediction
            pol_score = []
            for i in range(len(ypred)):
                pol_score.append((ypred[i,1]-ypred[i,0])*finScore)

            df['Polarity']=pol_score

        
            return df

        except:
            df['Polarity']=[0 for i in range(len(df))]
            return df
        return []
예제 #16
0
df.shape
df.head()

import pandas as pd
from pandas_datareader import data
# Set the start and end date
start_date = '1990-01-01'
end_date = '2019-04-27'
# Set the ticker
ticker = 'AMZN'
# Get the data
data = data.get_data_yahoo(ticker, start_date, end_date)
data.head()
data.shape

df = data.drop(['Volume'], axis=1)

import matplotlib.pyplot as plt
data['Adj Close'].plot()
df.plot()
plt.savefig('pandas_datareader_demo1.png')



import yfinance as yf
data = yf.download("SPY AAPL", start="2017-01-01", end="2017-04-30")
data.shape
data.head(10)

import yfinance as yf
stock_code = "GOLD.AX"
예제 #17
0
def removeCheapEquity(data, threshold = 5):
    tmp = (data['adj_close'].iloc[-1] < threshold)
    tmp2 = data.columns.levels[1][[np.where(tmp)]]
    return data.drop(tmp2[0], level = 1, axis = 1).shape
예제 #18
0
                  columns=['Ohio', 'Texas', 'California'])
frame2 = frame.reindex(['a', 'b', 'c', 'd'])
states = ['Texas', 'Utah', 'California']
frame.reindex(columns=states)
# frame.reindex(index=['a', 'b', 'c', 'd'], method='ffill', columns=states)
frame.reindex(index=['a', 'b', 'c', 'd'], columns=states)
frame.ix[['a', 'b', 'c', 'd'], states]

obj = Series(np.arange(5.), index=['a', 'b', 'c', 'd', 'e'])
new_obj = obj.drop('c')
obj.drop(['d', 'c'])

data = DataFrame(np.arange(16).reshape((4, 4)),
                 index=['Ohio', 'Colorado', 'Utah', 'New York'],
                 columns=['one', 'two', 'three', 'four'])
data.drop(['Colorado', 'Ohio'])
data.drop('two', axis=1)
data.drop(['two', 'four'], axis=1)

obj = Series(np.arange(4.), index=['a', 'b', 'c', 'd'])
obj['b']
obj[1]
obj[2:4]
obj[['b', 'a', 'd']]
obj[[1, 3]]
obj[obj < 2]
obj['b':'c']
obj['b':'c'] = 5
obj

data = DataFrame(np.arange(16).reshape((4, 4)),