def enter_fin_data(self, symbol, date1='-', date2='-'): os.system('docker volume create data4') os.system( 'docker run -e SYMBOL={} -e DATE1={} -e DATE2={} -v data4:/usr/share/app scrape' .format(symbol, date1, date2)) dir_loc = '/var/lib/docker/volumes/data4/_data' k = ext().give_file(dir_loc, 'finalkfr1') os.system('docker volume prune -f') os.chdir('/root/dock1/dock') return k
from sklearn.ensemble import AdaBoostClassifier from sklearn.ensemble import AdaBoostRegressor from sklearn.linear_model import LinearRegression from sklearn.tree import DecisionTreeClassifier from sklearn.metrics import confusion_matrix, accuracy_score from sklearn.model_selection import train_test_split from sklearn.decomposition import PCA from trainauto2 import neural from keras.wrappers.scikit_learn import KerasClassifier from keras.utils import to_categorical from keras.models import Sequential from keras.layers import Dense, BatchNormalization, Dropout from keras.metrics import binary_accuracy import pdb base_dir = '/home/sahil/projdir/fundamentals8' et = ext() simcolpairs = np.load(base_dir + '/simcolpairs.npy') knift = pd.read_csv(base_dir + '/ind_nifty500list.csv') k1 = et.give_file(base_dir, 'monthly_data') k = et.give_file(base_dir, 'finalkfr1') kk = [] kk1 = [] ss = pd.read_csv('/home/sahil/projdir/fundamentals8/11ylatest.csv') symbols_nse = ss.Symbol.unique() allsym = [j.title[0] for j in k1] kk1 = pd.read_pickle(base_dir + '/financials_yearly.pkl') def prepro(kp): print(kp.columns) kp['yearly'] = (kp.Close - kp.Close.shift(12)) / kp.Close.shift(12)
def fin_data(i, driver): try: print('in here') print(driver.current_url) driver.find_element_by_xpath('//*[@id="company"]').send_keys(i) bt = driver.find_element_by_css_selector( 'div.MT2:nth-child(1) > input:nth-child(2)') bt.click() print(driver.current_url) ct = 0 while (1): if (ct >= 3): break try: driver_act(driver) break except Exception as e: print(e) ct = ct + 1 time.sleep(10) pg1 = bsoap(driver.page_source, 'html.parser') title = pg1.find('h1', class_='pcstname').text rat = pg1.find('a', title='Ratios') driver.get(rat['href']) bse, nse = ext_symb(pg1) flag = 0 flag1 = 1 k = [] while (flag == 0): try: print('on') url1 = driver.current_url print(url1) k1 = pd.read_html(url1, header=0)[0] k.append(k1) print(len(k1)) k1['title'] = title k1['NSE'] = '' k1['BSE'] = '' if len(bse) != 0: k1['BSE'] = bse[0] if len(nse) != 0: k1['NSE'] = nse[0] print(nse[0]) pg = bsoap(driver.page_source, 'html.parser') btx = driver.find_element_by_xpath( '//*[@id="mc_content"]/div[2]/div/div[2]/ul/li[2]/a') driver.execute_script("arguments[0].click();", btx) if (driver.current_url.encode('ascii') == url1.encode('ascii') ): print('reach') flag = 1 except Exception as e: print(e) flag1 = 0 flag = 1 if (flag1 == 1): k = pd.concat(k, axis=1) df = k df = df.loc[:, ~df.columns.duplicated()] ext().store_file('/usr/share/app', 'finalkfr1', df) except Exception as e: print(e) return print(i) last_symb = nse[0] print(nse[0]) return k
import os from os import path import shutil import time from selenium import webdriver from selenium.webdriver.common.keys import Keys from selenium.webdriver.common.desired_capabilities import DesiredCapabilities import pandas as pd import numpy as np import pickle as pck from loading_data import ext from testscrape import scrape import sys dir1='/home/sahil/projdir/fundamentals8' done=[] p = ext().give_file(dir1,'finalkfr1') done=ext().give_file(dir1,'done_kfr') def ff(): from selenium.webdriver.firefox.options import Options options=Options() options.headless=True driver = webdriver.Firefox(options=options,executable_path='/home/sahil/Downloads/geckodriver') caps = DesiredCapabilities().FIREFOX caps["pageLoadStrategy"] = "normal" return driver def cc(): options = webdriver.ChromeOptions() options.add_argument('--headless') options.add_argument('--no-sandbox’_') options.add_argument('--disable-dev-shm-usage') driver= webdriver.Chrome('chromedriver',options=options)
from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC from datetime import date from nsepy import get_history from selenium.webdriver.support.ui import Select from testscrape import scrape from loading_data import ext import pandas as pd import numpy as np import pickle as pck import os from os import path import pdb symlist=[] dir = '/home/sahil/projdir/fundamentals8' lst = ext().give_file(dir,'monthly_data') done = [i.title[0] for i in lst] symlist=[i.title[0][0:i.title[0].find('Ltd.')] for i in ext().give_file(dir,'finalkfr1')] sym =[] for i in symlist: if i in done: continue else: sym.append(i) r=0 symlist=sym def mthdata(s): r=0 c=0 flag=0