def get_paths (): """ PURPOSE: Determine the path names of critical directories. Create these paths if they do not already exist """ # Get the directory containing this initialize.py script. import os dir_root = os.path.abspath(__file__ + "/../../") # Get the path names of the directories the script will work with. dir_detailed = dir_root + '/data/detailed' # Short or long dir_input = dir_root + '/data/input-short' # Short version dir_output = dir_root + '/data/output-short' # Short version if os.environ ['BSF_LENGTH'] == 'long': # Long version dir_input = dir_root + '/data/input-long' dir_output = dir_root + '/data/output-long' # Get the path name of the public directory in the rails app dir_home = os.environ ['HOME'] dir_rails_public = dir_home + '/bsf/public' if os.environ ['BSF_ENV'] == 'production': dir_rails_public = '/home/doppler/webapps/bsf/current/public' os.environ ['BSF_RAILS_PUBLIC'] = dir_rails_public # Print the names of the directories print "Input data directory: " + dir_input print "Output data directory: " + dir_output print "Directory for detailed fund data: " + dir_detailed print "Directory containing 'public' files in Rails: " + dir_rails_public # Create the directories if they do not already exist. from modules import common common.create_dir (dir_input) common.create_dir (dir_output) common.create_dir (dir_detailed) # Save results in environmental variables os.environ ['BSF_ROOT'] = dir_root os.environ ['BSF_INPUT'] = dir_input os.environ ['BSF_OUTPUT'] = dir_output os.environ ['BSF_DETAILED'] = dir_detailed os.environ ['BSF_RAILS_PUBLIC'] = dir_rails_public
def download (): """ Download detailed data on funds from Yahoo Finance """ import time, os import psycopg2.extras from modules import db from modules import common dir_output = os.environ ['BSF_OUTPUT'] file_csv_output = dir_output + '/fund_unfiltered.csv' conn = db.connect (60) # Start database connection cur2 = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) print "Creating unfiltered list of funds at:" print file_csv_output db.print_csv (conn, cur2, file_csv_output) conn.close () # Close database connection print "Filtering the list of funds" conn = db.connect (60) # Start database connection cur1 = conn.cursor () # Filter the list of funds db.filter_by_fundtype (conn, cur1) db.filter_by_obj (conn, cur1) db.filter_by_name (conn, cur1) db.renumber (conn, cur1) # Reset ID numbers, DOES NOT WORK conn.close () # Close database connection print "************************************************" print "Downloading the detailed data on all stock funds" print "NOTE: This may be a VERY long process." # Get list of symbols conn = db.connect (60) # Start database connection cur2 = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) list_symbols = db.get_symbols (cur2) conn.close () # Close database connection i = 0 # Number of funds completed i_max = len (list_symbols) # Total number of funds start = time.time () for symbol in list_symbols: dir_detailed = os.environ ['BSF_DETAILED'] common.create_dir (dir_detailed + '/' + symbol) url1 = 'http://finance.yahoo.com/q/pr?s=' + symbol + '+Profile' url2 = 'http://finance.yahoo.com/q/hl?s=' + symbol + '+Holdings' url3 = 'http://finance.yahoo.com/d/quotes.csv?s=' + symbol + '&f=l1' file1 = dir_detailed + '/' + symbol + '/profile.html' file2 = dir_detailed + '/' + symbol + '/holdings.html' file3 = dir_detailed + '/' + symbol + '/quote.csv' common.download_file (url1, file1, 164, .2) common.download_file (url2, file2, 164, .2) common.download_file (url3, file3, 20, .002) i += 1 now = time.time () t_elapsed = now - start try: rate_s = i / t_elapsed # Stocks/second remain_s = (i_max - i)/rate_s remain_m = round(remain_s/60, 1) if i == 10 or i % 100 == 0: print "Download completion: " + str(i) + '/' + str(i_max) print "Minutes remaining: " + str(remain_m) except: pass print "Finished downloading detailed data on stock funds" print "*************************************************"