コード例 #1
0
ファイル: rh_psql.py プロジェクト: eghensley/rhood_algo
def ind_perfs():    
    PSQL = db_connection('psql')  
    current, next_id = det_cur_perf(PSQL)
      
    url = 'https://www.alphavantage.co/query?function=SECTOR&apikey=%s' % openfigi_key
    req = requests.request('GET', url)
    #if req.status_code == 404:
    #    continue
    data = req.json()
    day_perf = data['Rank B: 1 Day Performance']
    date = datetime.strptime(data['Meta Data']['Last Refreshed'].replace(' ET', ''), '%H:%M %p %m/%d/%Y')
    if np.datetime64(date) != current:  
        communication = float(day_perf['Communication Services'].replace('%', ''))
        discretionary = float(day_perf['Consumer Discretionary'].replace('%', ''))
        staples = float(day_perf['Consumer Staples'].replace('%', ''))
        energy = float(day_perf['Energy'].replace('%', ''))
        financial = float(day_perf['Financials'].replace('%', ''))
        health = float(day_perf['Health Care'].replace('%', ''))
        industrial = float(day_perf['Industrials'].replace('%', ''))
        it = float(day_perf['Information Technology'].replace('%', ''))
        material = float(day_perf['Materials'].replace('%', ''))
        realestate = float(day_perf['Real Estate'].replace('%', ''))
        utilities = float(day_perf['Utilities'].replace('%', ''))
        
        script = "INSERT INTO portfolio.ind_perf(\
        	ind_perf_id, date, communication, discretionary, staples, energy, financial, health, industrial, it, material, realestate, utilities)\
        	VALUES (%i, '%s', %.2f, %.2f, %.2f, %.2f, %.2f, %.2f, %.2f, %.2f, %.2f, %.2f, %.2f);" % (next_id, date, communication, discretionary, staples, energy, financial, health, industrial, it, material, realestate, utilities)
        pg_insert(PSQL.client, script)
コード例 #2
0
def store_meta_res(domain):
    #    domain = 'length'
    X, Y = pull_val_data(domain)

    pred_df = pd.DataFrame(Y)
    #    res_df = pd.DataFrame()
    final_model_folder = os.path.join(cur_path, 'model_tuning', 'modelling',
                                      domain, 'final', 'models')
    for mod_name in os.listdir(final_model_folder):
        if mod_name == '.DS_Store':
            continue
        model_path = os.listdir(os.path.join(final_model_folder, mod_name))
        model = load(os.path.join(final_model_folder, mod_name, model_path[0]))
        feats_folder = os.path.join(cur_path, 'model_tuning', 'modelling',
                                    domain, 'final', 'features')
        with open(os.path.join(feats_folder, '%s.json' % (mod_name)),
                  'r') as fp:
            feats = json.load(fp)
            feats = feats[max(feats.keys())]
        scale_folder = os.path.join(cur_path, 'model_tuning', 'modelling',
                                    domain, 'final', 'scalers', mod_name)
        scale_path = os.path.join(scale_folder,
                                  os.listdir(os.path.join(scale_folder))[0])
        scale = load(scale_path)
        mod_preds = cross_validate(X[feats], Y, model, scale)
        mod_preds.rename(columns={0: mod_name}, inplace=True)
        pred_df = pred_df.join(mod_preds)

    pred_cols = [i for i in list(pred_df) if i != domain]

    mod_scores = {}
    for idx in pred_df.index:
        mod_scores[idx] = {}
        row = pred_df.loc[idx]
        for mod in pred_cols:
            if domain == 'winner':
                row_score = abs(row[domain] - row[mod])
            elif domain == 'length':
                row_score = abs(row[domain] - row[mod])
            mod_scores[idx][mod] = row_score
    mod_scores = pd.DataFrame.from_dict(mod_scores).T

    meta_data = mod_scores.join(X)
    PSQL = db_connection('psql')
    bouts = pg_query(
        PSQL.client,
        "select b.bout_id, weight_desc from ufc.bouts b join ufc.bout_results br on br.bout_id = b.bout_id join ufc.fights f on f.fight_id = b.fight_id join ufc.weights w on b.weight_id = w.weight_id"
    )
    bouts.columns = ['bout_id', 'weight_id']
    weights = pd.get_dummies(bouts['weight_id'])
    weights['index'] = bouts['bout_id']
    weights.drop_duplicates(inplace=True)
    weights.set_index('index', inplace=True)
    meta_data = meta_data.join(weights)
    meta_data.to_csv(
        os.path.join(cur_path, 'data', 'meta', 'meta_%s.csv' % (domain)))
コード例 #3
0
ファイル: scraper.py プロジェクト: eghensley/research_match
def scrape():
    DB = _connections.db_connection('mongo')

    with open(os.path.join(cur_path, 'last_error.json')) as f:
        prev_progress = json.load(f)

    url = prev_progress['last_page']
    #    url = 'https://www.researchgate.net/search/publications?q=marine%2Bscience&page=1'

    browser = _connections.sel_scraper(headless=False)

    while True:

        try:
            browser.get(url)  #, headers=head)
            sleep(randint(10, 100) / 10)
            tree = html.fromstring(browser.page_source)

            paper_links = tree.xpath(
                '//div[@class="react-container"]/div/div[2]/div[2]/div/div[2]/div/div[1]/div/div/div/div/div/div/div/div/div/div/a/@href'
            )
            paper_links = [i.split('?')[0] for i in paper_links]

            if len(paper_links) == 0:
                DB.disconnect()
                browser.close()
                raise ValueError()

        except:
            with open(os.path.join(cur_path, 'last_error.json'), 'w') as fp:
                json.dump({'last_page': url}, fp)
            DB.disconnect()
            browser.close()
            raise IndexError()

        for p_link in paper_links:
            #            dfas
            try:
                len(DB.client.find_one({'url_tag': p_link}))
                print('Article already archived, skipping')
            except:
                if 'book_review' in p_link.lower():
                    continue
                print(p_link)
                try:
                    browser, DB = store_abstracts(p_link, browser, DB)
                except:
                    print(p_link)

                    with open(os.path.join(cur_path, 'last_error.json'),
                              'w') as fp:
                        json.dump({'error_link': p_link, 'last_page': url}, fp)
                    DB.disconnect()
                    browser.close()
                    raise ValueError()
        url = next_page(url)
コード例 #4
0
ファイル: rh_psql.py プロジェクト: eghensley/rhood_algo
def add_stock_ids():
    PSQL = db_connection('psql')
    cur_stocks = _det_current_stocks(PSQL)
    url = 'https://api.robinhood.com/instruments/'
    data_dict = requests.get(url).json()
    cur_stocks = stock_loop(PSQL, data_dict['results'], cur_stocks)
    while True:
        url = data_dict['next']
        data_dict = requests.get(url).json()
        cur_stocks = stock_loop(PSQL, data_dict['results'], cur_stocks)
          
        if data_dict['next'] is None:
            break   
コード例 #5
0
ファイル: update_psql.py プロジェクト: eghensley/cryptoalgo
def update():    
    PSQL = db_connection('psql')
    next_update = floor_dt(datetime.now(), timedelta(minutes=30))
    dt_until_update = next_update - datetime.now()
    seconds_before_update = dt_until_update.total_seconds()
    tm.sleep(seconds_before_update)
    BINANCE = binance_connection()
    NLU = NaturalLanguageUnderstandingV1(version=_config.nlu_credentials["version"], username=_config.nlu_credentials["username"],
                                            password=_config.nlu_credentials["password"])
    NLU.set_default_headers({'x-watson-learning-opt-out' : "true"})
    auth = tweepy.OAuthHandler(_config.twitter_key, _config.twitter_secret)
    TWITTER = tweepy.API(auth)
    
    page = requests.get('https://info.binance.com/en/all')
    tree = html.fromstring(page.content)
    COINS = [{'name':i, 'price':j} for i,j in zip(tree.xpath('//*[@id="__next"]/div/main/div/div/div/div[2]/div[1]/table/tbody/tr/td[2]/div/div/span[1]/text()'), tree.xpath('//*[@id="__next"]/div/main/div/div/div/div[2]/div[1]/table/tbody/tr/td[3]/div/div/text()'))]
          
         
    LAST_TIME = floor_dt(datetime.now(), timedelta(minutes=-30))
    PRICES = BINANCE.get_all_tickers()
    
    pop_coins(PSQL, COINS)
    pop_markets(PSQL, PRICES)
    pop_times(PSQL, LAST_TIME)
    
    CURRENT_MKT_DATA, MRKT_CONV, TIME_CONV = conversion_data(PSQL)
    
    pop_exchanges(PSQL, LAST_TIME, PRICES, MRKT_CONV, CURRENT_MKT_DATA, TIME_CONV, BINANCE)

    tweet_data, coin_id_conv, use_coins = insert_twitter(PSQL, TWITTER, NLU)
#    nlu_insert(tweet_data[[0,1]], NLU, PSQL, coin_id_conv, use_coins)
    
#    pop_prices(PSQL, TIME_CONV, COINS)
    
    PSQL.disconnect()
    PSQL = None
    BINANCE = None
    COINS = None
    LAST_TIME = None
    PRICES = None
    CURRENT_MKT_DATA = None
    MRKT_CONV = None
    TIME_CONV = None
    TWITTER = None
    NLU = None
    tweet_data = None
    coin_id_conv = None
    use_coins = None
コード例 #6
0
ファイル: rh_psql.py プロジェクト: eghensley/rhood_algo
def dividends(full_update = False):
    print(' ~~ Dividends ~~ ')

    PSQL = db_connection('psql')
    id_sym = pg_query(PSQL.client, 'select rh_id, rh_sym from portfolio.stocks')          
    current, next_id = det_cur_divs(PSQL)
    total_stocks = len(id_sym)
    for stock_num, (idx, sym) in enumerate(id_sym.values):
        progress(stock_num, total_stocks, status = sym)

        if not full_update and idx in current.keys():
            continue
        url = 'https://api.iextrading.com/1.0/stock/%s/dividends/5y' % (sym)        
        req = requests.request('GET', url)
        if req.status_code == 404:
            continue
        data = req.json()
        if len(data) == 0:
            continue
        for div in reversed(data):
            ex_date = datetime.strptime(div['exDate'], '%Y-%m-%d')
            if idx in current.keys() and ex_date <= current[idx]:
                continue
            if div['amount'] == '':
                continue
            amount = float(div['amount'])
            if div['paymentDate'] == '':
                continue
            payment_date = datetime.strptime(div['paymentDate'], '%Y-%m-%d')
            record_date = datetime.strptime(div['recordDate'], '%Y-%m-%d')
            if div['declaredDate'] == '':
                declared_date = 'null'
            else:
                declared_date = "'"+str(datetime.strptime(div['declaredDate'], '%Y-%m-%d'))+"'"
            
            script = "INSERT INTO portfolio.dividends(\
                    div_id, rh_id, ex_date, payment_date, record_date, declared_date, amount) \
                    VALUES (%i, '%s', '%s', '%s', '%s', %s, %.2f);" % (next_id, idx, ex_date, payment_date, record_date, declared_date, amount)
            pg_insert(PSQL.client, script)
            next_id += 1
            current[idx] = ex_date
            ex_date = None
            payment_date = None
            record_date = None
            declared_date = None
            amount = None
コード例 #7
0
ファイル: rh_psql.py プロジェクト: eghensley/rhood_algo
def day_prices():
    
    print(' ~~ Full Day Prices ~~ ')
    PSQL = db_connection('psql')
    current, next_id = det_cur_day_prices(PSQL, 'day')
    id_sym = pg_query(PSQL.client, 'select rh_id, rh_sym from portfolio.stocks')
    total_stocks = len(id_sym)
    trader = rs()
    login_data = trader.login()
        
    for stock_num, (idx, sym) in enumerate(id_sym.values):
#        if idx == '1d4d0780-ba27-4adc-ab12-0c3062fdf365':
#            asdfasdf
        progress(stock_num, total_stocks, status = sym)
        symbols = helper.inputs_to_set(sym)
        url = urls.historicals()
        payload = { 'symbols' : ','.join(symbols),
                    'interval' : 'day',
                    'span' : '5year',
                    'bounds' : 'regular'}
        data = trader.request_get(url,'results',payload)
        
        if data == [None]:
            continue
        for day in data[0]['historicals']:
            beg_date = datetime.strptime(day['begins_at'].replace('Z', '').replace('T', ' '), '%Y-%m-%d %H:%M:%S')
            if idx in current.keys() and beg_date <= current[idx]:
                continue
    
            open_price = float(day['open_price'])
            close_price = float(day['close_price'])
            high_price = float(day['high_price'])
            low_price = float(day['low_price'])
            volume = int(day['volume'])
            script = "INSERT INTO portfolio.day_prices(day_price_id, rh_id, date, open_price, close_price, high_price, low_price, volume) VALUES ('%s', '%s', '%s', %.2f, %.2f, %.2f, %.2f, %i);" % (next_id, idx, beg_date, open_price, close_price, high_price, low_price, volume)
            pg_insert(PSQL.client, script)
            next_id += 1  
            open_price = None
            close_price = None
            high_price = None
            low_price = None
            volume = None
    trader.logout()
コード例 #8
0
ファイル: update_psql.py プロジェクト: eghensley/cryptoalgo
def find_arb():
    next_update = floor_dt(datetime.now(), timedelta(minutes=30))
    dt_until_update = next_update - datetime.now()
    seconds_before_update = dt_until_update.total_seconds()
    PSQL_2 = db_connection('psql')
    BINANCE_2 = binance_connection()
    potentials = filter_potentials(PSQL_2)
    while seconds_before_update > 0:
        for coin2, coin1, _ in potentials: 
            
#            coin2,coin1,_ = potentials[-1]
            prices, available_eth = cap_market(coin1, coin2, BINANCE_2)
            forward_tri = sim_tri(prices, [coin2, coin1], float(available_eth['free']))
            if forward_tri:
                print('ETH -> %s -> %s -> ETH arbitrage: %.3f%%' % (coin2, coin1, 100 * (forward_tri[-1][-1] - forward_tri[0][-1])))
                if forward_tri[-1][-1] > forward_tri[0][-1]:
                    print(forward_tri)
                    raise Exception('Found Profit!!!')
            backward_tri = sim_tri(prices, [coin1, coin2], float(available_eth['free']))
            if backward_tri:
                print('ETH -> %s -> %s -> ETH arbitrage: %.3f%%' % (coin1, coin2, 100 * (backward_tri[-1][-1] - backward_tri[0][-1])))
                if backward_tri[-1][-1] > backward_tri[0][-1]:
                    print(backward_tri)
                    raise Exception('Found Profit!!!')
#        tm.sleep(15)
        dt_until_update = next_update - datetime.now()
        seconds_before_update = dt_until_update.total_seconds()     
        
    BINANCE_2 = None
    PSQL_2.disconnect()
    PSQL_2 = None
    prices = None
    coin1 = None
    coin2 = None
    forward_tri = None
    backward_tri = None
    next_update = None
    dt_until_update = None
    seconds_before_update = None
コード例 #9
0
from joblib import load
import pandas as pd
from _connections import db_connection
from db.pop_psql import pg_query
from progress_bar import progress


def conv_to_ml_odds(prob):
    if prob > .5:
        odds = (prob/(1-prob))*100
    else:
        odds = ((1-prob)/prob)*100
    return(odds)
    
    
PSQL = db_connection('psql')

alpha_data = pd.read_csv(os.path.join(cur_path, 'data', 'pred_data_winner_est_training.csv'))
alpha_data = alpha_data.sort_values('bout_id').set_index(['bout_id', 'fighter_id'])
X0 = alpha_data[[i for i in list(alpha_data) if i != 'winner']]
Y0 = alpha_data['winner'].apply(lambda x: x if x == 1 else 0)


beta_data = pd.read_csv(os.path.join(cur_path, 'data', 'pred_data_winner_ens_training.csv'))
beta_data = beta_data.sort_values('bout_id').set_index(['bout_id', 'fighter_id'])
X2 = beta_data[[i for i in list(beta_data) if i != 'winner']]
Y2 = beta_data['winner'].apply(lambda x: x if x == 1 else 0)


domain = 'winner'
final_model_folder = os.path.join(cur_path, 'model_tuning', 'modelling', domain, 'final', 'models')
コード例 #10
0
ファイル: triangular.py プロジェクト: eghensley/cryptoalgo
while cur_path.split('/')[-1] != 'binance':
    cur_path = os.path.abspath(os.path.join(cur_path, os.pardir))    
sys.path.insert(1, os.path.join(cur_path, 'lib', 'python3.7', 'site-packages'))


import requests
from lxml import html
from itertools import permutations
from binance.enums import *
from _connections import binance_connection, db_connection
from datetime import datetime, timedelta
import cryptocompare


psql = db_connection('psql')

page = requests.get('https://info.binance.com/en/all')
tree = html.fromstring(page.content)
coins = [{'name':i, 'price':j} for i,j in zip(tree.xpath('//*[@id="__next"]/div/main/div/div/div/div[2]/div[1]/table/tbody/tr/td[2]/div/div/span[1]/text()')[:50], tree.xpath('//*[@id="__next"]/div/main/div/div/div/div[2]/div[1]/table/tbody/tr/td[3]/div/div/text()')[:50])]

client = binance_connection()
prices = client.get_all_tickers()

stable_coin = 'PAX'
stable_len = len(stable_coin)
stable_sells = {i['symbol'][:-stable_len]:float(i['price']) for i in prices if i['symbol'][stable_len:] == stable_coin}
stable_buys = {i['symbol'][stable_len:]:float(i['price']) for i in prices if i['symbol'][:-stable_len] == stable_coin}

def floor_dt(dt, delta):
    return dt + (datetime.min - dt) % delta
コード例 #11
0
ファイル: rh_psql.py プロジェクト: eghensley/rhood_algo
def financials():
    PSQL = db_connection('psql')
    current, next_id = det_cur_fin(PSQL)
    id_sym = pg_query(PSQL.client, 'select rh_id, rh_sym from portfolio.stocks') 
    total_stocks = len(id_sym)
    for stock_num, (idx, sym) in enumerate(id_sym.values):
        progress(stock_num, total_stocks, status = sym)
        
        url = 'https://api.iextrading.com/1.0/stock/%s/financials' % (sym)        
        req = requests.request('GET', url)
        if req.status_code == 404:
            continue
        data = req.json()
        if len(data) == 0:
            continue
        
        for stock in reversed(data['financials']):
            
            report_date = datetime.strptime(stock['reportDate'], '%Y-%m-%d')
            
            if idx in current.keys() and current[idx] >= np.datetime64(report_date):
                continue
            
            if stock['grossProfit'] is None:
                gross_profit = 'null'
            else:
                gross_profit = int(stock['grossProfit'])
                
            if stock['costOfRevenue'] is None:
                cost_revenue = 'null'
            else:
                cost_revenue = int(stock['costOfRevenue'])
    
            if stock['operatingRevenue'] is None:
                operating_revenue = 'null'
            else:
                operating_revenue = int(stock['operatingRevenue'])
    
            if stock['totalRevenue'] is None:
                total_revenue = 'null'
            else:
                total_revenue = int(stock['totalRevenue'])
    
            if stock['operatingIncome'] is None:
                operating_income = 'null'
            else:
                operating_income = int(stock['operatingIncome'])
    
            if stock['netIncome'] is None:
                net_income = 'null'
            else:
                net_income = int(stock['netIncome'])
    
            if stock['researchAndDevelopment'] is None:
                r_d = 'null'
            else:
                r_d = int(stock['researchAndDevelopment'])
    
            if stock['operatingExpense'] is None:
                operating_expense = 'null'
            else:
                operating_expense = int(stock['operatingExpense'])
    
            if stock['currentAssets'] is None:
                current_assets = 'null'
            else:
                current_assets = int(stock['currentAssets'])
    
            if stock['totalAssets'] is None:
                total_assets = 'null'
            else:
                total_assets = int(stock['totalAssets'])
                
            if stock['totalLiabilities'] is None:
                total_liabilities = 'null'
            else:
                total_liabilities = int(stock['totalLiabilities'])
    
            if stock['currentCash'] is None:
                current_cash = 'null'
            else:
                current_cash = int(stock['currentCash'])
    
            if stock['currentDebt'] is None:
                current_debt = 'null'
            else:
                current_debt = int(stock['currentDebt'])
    
            if stock['totalCash'] is None:
                total_cash = 'null'
            else:
                total_cash = int(stock['totalCash'])
    
            if stock['totalDebt'] is None:
                total_debt = 'null'
            else:
                total_debt = int(stock['totalDebt'])
    
            if stock['shareholderEquity'] is None:
                shareholder_equity = 'null'
            else:
                shareholder_equity = int(stock['shareholderEquity'])
    
            if stock['cashChange'] is None:
                cash_change = 'null'
            else:
                cash_change = int(stock['cashChange'])
         
            if stock['cashFlow'] is None:
                cash_flow = 'null'
            else:
                cash_flow = int(stock['cashFlow'])
    
            if stock['operatingGainsLosses'] is None:
                operating_gl = 'null'
            else:
                operating_gl = int(stock['operatingGainsLosses'])
                
    
            script = "INSERT INTO portfolio.financials(\
                	financials_id, rh_id, report_date, gross_profit, cost_revenue, operating_revenue, total_revenue, operating_income, net_income, r_d, operating_expense, current_assets, total_assets, total_liabilities, current_cash, current_debt, total_cash, total_debt, shareholder_equity, cash_change, cash_flow, operating_gl)\
                	VALUES (%i, '%s', '%s', %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);" % (next_id, idx, report_date, gross_profit, cost_revenue, operating_revenue, total_revenue, operating_income, net_income, r_d, operating_expense, current_assets, total_assets, total_liabilities, current_cash, current_debt, total_cash, total_debt, shareholder_equity, cash_change, cash_flow, operating_gl)
            pg_insert(PSQL.client, script)
            next_id += 1
            current[idx] = np.datetime64(report_date)
            
            report_date = None
            gross_profit = None
            cost_revenue = None
            operating_revenue = None
            total_revenue = None
            operating_income = None
            net_income = None
            r_d = None
            operating_expense = None
            current_assets = None
            total_assets = None
            total_liabilities = None
            current_cash = None
            current_debt = None
            total_cash = None
            total_debt = None
            shareholder_equity = None
            cash_change = None
            cash_flow = None
            operating_gl = None
コード例 #12
0
ファイル: pop_psql.py プロジェクト: eghensley/research_match
                    inst = auth['institution']
                    if 'page' not in inst.keys():
                        continue
                    inst['page'] = inst['page'].replace(
                        'https://www.researchgate.net/', '').lower()
                    if inst['page'] not in cur_insts.keys():
                        nxt_inst, cur_insts = pop_inst(psql, nxt_inst, nxt_pap,
                                                       cur_insts, inst)
                    nxt_pap_inst = pop_pap_inst(psql, cur_insts[inst['page']],
                                                nxt_pap, nxt_pap_inst)


def update_refs(mongo, psql):
    nxt_pap, cur_paps, psql = _det_current_(psql, 'papers')
    pg_create_table(psql.client, 'references')
    nxt_ref = 0
    for entry in mongo.client.find({'abstract': {'$exists': True}}):
        if 'references' in entry.keys():
            entry['url_tag'] = entry['url_tag'].lower()
            for ref in entry['references']:
                if ref.lower() in cur_paps.keys():
                    script = "insert into research_match.references (ref_id, cit_pap_id, ref_pap_id) VALUES (%i, %i, %i)" % (
                        nxt_ref, cur_paps[entry['url_tag']],
                        cur_paps[ref.lower()])
                    pg_insert(psql.client, script)
                    nxt_ref += 1


MONGO = _connections.db_connection('mongo')
PSQL = _connections.db_connection('psql')
コード例 #13
0
ファイル: scraper.py プロジェクト: eghensley/research_match
def add_refs():
    DB = _connections.db_connection('mongo')
    browser = _connections.sel_scraper(headless=False)

    #    total = DB.client.find({'references': {'$exists': False}, 'abstract': {'$exists': True}}).count()
    total = DB.client.find({
        'references': [],
        'abstract': {
            '$exists': True
        }
    }).count()

    print('%i Documents need updating' % (total))
    #    for doc_num, (doc) in enumerate(DB.client.find({'references': {'$exists': False}, 'abstract': {'$exists': True}})):
    for doc_num, (doc) in enumerate(
            DB.client.find({
                'references': [],
                'abstract': {
                    '$exists': True
                }
            })):
        #        if doc_num < 7:
        #            continue
        #        sdafaf
        #        print(doc['url_tag'])
        url = 'https://www.researchgate.net/' + doc['url_tag']
        sleep(randint(10, 100) / 25)

        browser.get(url)
        article_tree = html.fromstring(browser.page_source)
        if len(article_tree.xpath('//div[@class="temporarily-blocked"]')) > 0:
            DB.disconnect()
            browser.close()
            DB = None
            browser = None
            raise ValueError('Login Error')


#            login(browser, url)

        if len(
                article_tree.xpath(
                    '//div[@class="captcha-container js-widgetContainer"]')
        ) > 0:
            DB.disconnect()
            browser.close()
            DB = None
            browser = None
            print('Captcha Activated')
            #            return(False)
            raise ValueError('Captcha Activated')

        article_tree = html.fromstring(browser.page_source)
        if True:
            sleep(1)
            if len(
                    browser.find_elements_by_xpath(
                        '//div[@class="nova-c-nav__wrapper"]/div[@class="nova-c-nav__items"]/button'
                    )) == 0:
                continue
                raise Exception('No References')
            article_tree = html.fromstring(browser.page_source)

            ref_tabs = article_tree.xpath(
                '//div[@class="nova-c-nav__wrapper"]/div[@class="nova-c-nav__items"]/button/span/div/text()'
            )
            ref_tab = [i for i in ref_tabs if 'references' in i.lower()][0]

            sel_refs = True
            if len(
                    article_tree.xpath(
                        '//button[@class="nova-c-nav__item is-selected references js-lite-click" and ./span/div="%s"]'
                        % (ref_tab))) > 0:
                sel_refs = False
            if len(
                    article_tree.xpath(
                        '//button[@class="nova-c-nav__item references js-lite-click is-selected" and ./span/div="%s"]'
                        % (ref_tab))) > 0:
                sel_refs = False
            ref_button = browser.find_elements_by_xpath(
                '//button[./span/div="%s"]' % (ref_tab))
            browser.execute_script("window.scrollTo(0, %i);" %
                                   (ref_button[0].location['y'] - 100))

            if sel_refs:
                article_tree = html.fromstring(browser.page_source)
                if len(
                        article_tree.xpath(
                            '//button[@class="nova-c-nav__item is-selected references js-lite-click" and ./span/div="%s"]'
                            % (ref_tab))) > 0:
                    sel_refs = False
                elif len(
                        article_tree.xpath(
                            '//button[@class="nova-c-nav__item references js-lite-click is-selected" and ./span/div="%s"]'
                            % (ref_tab))) > 0:
                    sel_refs = False
                else:
                    ref_button = browser.find_elements_by_xpath(
                        '//button[./span/div="%s"]' % (ref_tab))
                    browser.execute_script(
                        "window.scrollTo(0, %i);" %
                        (ref_button[-1].location['y'] - 100))
                    ref_button[-1].click()
                    sel_refs = False

            article_tree = html.fromstring(browser.page_source)
            init_see_more = len(
                article_tree.xpath('//button[./span="Show more"]'))
            see_more = len(article_tree.xpath('//button[./span="Show more"]'))
            num_fails = 0
            while see_more == init_see_more:
                if see_more == 0:
                    break
                sleep(1)
                load_fail = _load_more(browser)
                if load_fail:
                    num_fails += 1
                if num_fails > 20:
                    DB.disconnect()
                    browser.close()
                    DB = None
                    browser = None
                    print('Ref Error')
                    #            return(False)
                    raise IndexError('Ref Error')

                article_tree = html.fromstring(browser.page_source)
                see_more = len(
                    article_tree.xpath('//button[./span="Show more"]'))

        article_tree = html.fromstring(browser.page_source)

        #        article_tree = html.fromstring(browser.page_source)
        references = article_tree.xpath(
            '//div[@class="nova-v-publication-item__stack-item"]/div/a/@href')
        references = [i.split('?_sg')[0] for i in references]

        #        references = article_tree.xpath('//div[@itemprop="citation"]/div/div/div/div/a/@href')
        DB.client.update_one({'_id': doc['_id']},
                             {'$set': {
                                 'references': references
                             }})
        progress(doc_num + 1, total, status='%i' % (doc_num + 1))