Esempio n. 1
0
def insert(date):
    start = time.time()
    datafilepath = settings.data_dir + "UnderlyingOptionsEODQuotes_" + str(date) + ".zip"
    unzippedpath = util.unzip(datafilepath)
    
    db = psycopg2.connect("host=localhost dbname=optiondata user="******"DROP TABLE")
    cur.execute("DROP TABLE IF EXISTS fullday") 
    
    print("CREATE TABLE")
    cur.execute("CREATE TABLE fullday(id SERIAL PRIMARY KEY NOT NULL, underlying_symbol VARCHAR, quote_date date, root VARCHAR, expiration date, strike decimal, option_type VARCHAR, open decimal, high decimal, low decimal, close decimal, trade_volume int, bid_size_1545 VARCHAR, bid_1545 decimal, ask_size_1545 VARCHAR, ask_1545 decimal, underlying_bid_1545 decimal, underlying_ask_1545 decimal, underlying_mid_1545 decimal, bid_size_eod VARCHAR, bid_eod VARCHAR, ask_size_eod VARCHAR, ask_eod VARCHAR, mid_1545 decimal, underlying_bid_eod VARCHAR, underlying_ask_eod VARCHAR, vwap decimal, open_interest VARCHAR, delivery_code VARCHAR, rf decimal, rtiy decimal, iv decimal, bs_price_bid_ask decimal, delta decimal, theta decimal, vega decimal)")

    print("CREATE INDEX...")
    cur.execute("CREATE INDEX idx_underlying_symbol_fullday ON fullday USING btree (underlying_symbol)")
    cur.execute("CREATE INDEX idx_strike_fullday ON fullday USING btree (strike)")
    cur.execute("CREATE INDEX idx_expiration_fullday ON fullday USING btree (expiration)")
    cur.execute("CREATE INDEX idx_type_fullday ON fullday USING btree (option_type)")
    cur.execute("CREATE INDEX idx_bid_1545_fullday ON fullday USING btree (bid_1545)")
    cur.execute("CREATE INDEX idx_ask_1545_fullday ON fullday USING btree (ask_1545)")
    
    print ("copy_from and commit")
    with open(unzippedpath, 'r') as f:
        next(f)  # Skip the header row.
        cur.copy_from(f, 'fullday', sep=',', columns=('underlying_symbol', 'quote_date', 'root', 'expiration', 'strike', 'option_type', 'open', 'high', 'low', 'close', 'trade_volume', 'bid_size_1545', 'bid_1545', 'ask_size_1545', 'ask_1545', 'underlying_bid_1545', 'underlying_ask_1545', 'bid_size_eod', 'bid_eod', 'ask_size_eod', 'ask_eod', 'underlying_bid_eod', 'underlying_ask_eod', 'vwap', 'open_interest', 'delivery_code'))
        db.commit()

    print("Update: LOWER(option_type)")
    cur.execute("UPDATE fullday SET option_type = LOWER(option_type)")
    db.commit()
    
    print("Update: underlying_mid_1545")
    cur.execute("UPDATE fullday SET underlying_mid_1545 = (underlying_bid_1545 + underlying_ask_1545) / 2")
    db.commit()
    
    print("Update: mid_1545")
    cur.execute("UPDATE fullday SET mid_1545 = (bid_1545 + ask_1545) / 2")
    db.commit()
    
    print()
    print ("Done!")


    if unzippedpath != "": 
        os.remove(unzippedpath)

    end = time.time() 
    print (end - start)
    db.close()
Esempio n. 2
0
def insert(underlyings):

    print("insert: " + str(underlyings))
    print()

    engine = create_engine("postgresql://" + settings.db_username + ":" +
                           settings.db_password + "@127.0.0.1/optiondata")
    table = 'optiondata'

    db = pgdb.connect(host="localhost",
                      user=settings.db_username,
                      password=settings.db_password,
                      database="optiondata")
    cur2 = db.cursor()

    counter = 0

    onlyfiles = [
        f for f in listdir(settings.path_to_data_directory)
        if isfile(join(settings.path_to_data_directory, f))
    ]
    for datafile in onlyfiles:

        if datafile.endswith(".zip"):

            index = datafile.index('_') + 1
            datestring = datafile[index:(index + 10)]
            unzippedpath = ""

            for underlying in underlyings:

                query = "SELECT EXISTS(SELECT 1 FROM optiondata WHERE quote_date = '" + datestring + "' AND underlying_symbol = '" + underlying + "')"
                cur2.execute(query)
                row = cur2.fetchone()

                if (row[0] == 0):

                    counter += 1
                    print(
                        str(counter) + "\t" + datestring + "\t" + underlying +
                        "\t" + str(datafile))

                    if unzippedpath == "":
                        datafilepath = settings.path_to_data_directory + datafile
                        unzippedpath = util.unzip(datafilepath)

                    csv = pd.read_csv(unzippedpath,
                                      header=0,
                                      dtype={
                                          "underlying_symbol": object,
                                          "quote_date": object,
                                          "root": object,
                                          "expiration": object,
                                          "strike": np.float64,
                                          "option_type": object,
                                          "open": np.float64,
                                          "high": np.float64,
                                          "low": np.float64,
                                          "close": np.float64,
                                          "trade_volume": np.int64,
                                          "bid_size_1545": np.int64,
                                          "bid_1545": np.float64,
                                          "ask_size_1545": np.int64,
                                          "ask_1545": np.float64,
                                          "underlying_bid_1545": np.float64,
                                          "underlying_ask_1545": np.float64,
                                          "bid_size_eod": np.int64,
                                          "bid_eod": np.float64,
                                          "ask_size_eod": np.int64,
                                          "ask_eod": np.float64,
                                          "underlying_bid_eod": np.float64,
                                          "underlying_ask_eod": np.float64,
                                          "vwap": object,
                                          "open_interest": np.float64,
                                          "ask_eod": np.float64,
                                          "delivery_code": object
                                      })

                    filtered = csv[(csv['underlying_symbol'] == underlying)]
                    if underlying == "^SPX":
                        filtered = filtered[(filtered.root != 'BSZ') &
                                            (filtered.root != 'BSK'
                                             )]  # filter out binary options
                    filtered['option_type'] = filtered.option_type.str.lower()
                    filtered['mid_1545'] = (filtered['bid_1545'] +
                                            filtered['ask_1545']) / 2
                    filtered['underlying_mid_1545'] = (
                        filtered['underlying_bid_1545'] +
                        filtered['underlying_ask_1545']) / 2
                    filtered.to_sql(table,
                                    engine,
                                    if_exists='append',
                                    index=False)
                    db.commit()

            if unzippedpath != "":
                os.remove(unzippedpath)

    print()
    print("Done inserting data ")
    print()

    db.close()
Esempio n. 3
0
def insert(underlyings, dir, precompute):

    print("insert: " + str(underlyings))
    print()

    engine = create_engine("postgresql://" + settings.db_username + ":" +
                           settings.db_password + "@127.0.0.1/optiondata")
    table = 'optiondata'

    db = psycopg2.connect(host="localhost",
                          user=settings.db_username,
                          password=settings.db_password,
                          database="optiondata")
    cur2 = db.cursor()

    counter = 0
    dates = set()

    # traverse root directory, and list directories as dirs and files as files
    for root, dirs, files in os.walk(dir):

        for file in files:

            if file.endswith(".zip"):

                index = file.index('_') + 1
                datestring = file[index:(index + 10)]

                date = datetime.strptime(datestring, '%Y-%m-%d').date()
                print(date)

                unzippedpath = ""

                for underlying in underlyings:

                    if ((underlying in startdates) and
                        (date > startdates[underlying])) or (
                            underlying not in startdates):

                        query = "SELECT EXISTS(SELECT 1 FROM optiondata WHERE quote_date = '" + datestring + "' AND underlying_symbol = '" + underlying + "')"
                        cur2.execute(query)
                        row = cur2.fetchone()

                        if (row[0] == False):

                            counter += 1
                            print(
                                str(counter) + "\t" + datestring + "\t" +
                                underlying + "\t" + str(file))

                            if unzippedpath == "":
                                datafilepath = root + "/" + file
                                unzippedpath = util.unzip(datafilepath)

                            if unzippedpath is not None:

                                df = pd.read_csv(
                                    unzippedpath,
                                    header=0,
                                    dtype={
                                        "underlying_symbol": object,
                                        "quote_date": object,
                                        "root": object,
                                        "expiration": object,
                                        "strike": np.float64,
                                        "option_type": object,
                                        "open": np.float64,
                                        "high": np.float64,
                                        "low": np.float64,
                                        "close": np.float64,
                                        "trade_volume": np.int64,
                                        "bid_size_1545": np.int64,
                                        "bid_1545": np.float64,
                                        "ask_size_1545": np.int64,
                                        "ask_1545": np.float64,
                                        "underlying_bid_1545": np.float64,
                                        "underlying_ask_1545": np.float64,
                                        "bid_size_eod": np.int64,
                                        "bid_eod": np.float64,
                                        "ask_size_eod": np.int64,
                                        "ask_eod": np.float64,
                                        "underlying_bid_eod": np.float64,
                                        "underlying_ask_eod": np.float64,
                                        "vwap": object,
                                        "open_interest": np.float64,
                                        "delivery_code": object
                                    })

                                filtered = df[(
                                    df['underlying_symbol'] == underlying)]

                                if underlying == "^SPX":
                                    filtered = filtered[
                                        (filtered.root != 'BSZ') &
                                        (filtered.root !=
                                         'BSK')]  # filter out binary options

                                filtered[
                                    'option_type'] = filtered.option_type.str.lower(
                                    )
                                filtered['mid_1545'] = (
                                    filtered['bid_1545'] +
                                    filtered['ask_1545']) / 2
                                filtered['underlying_mid_1545'] = (
                                    filtered['underlying_bid_1545'] +
                                    filtered['underlying_ask_1545']) / 2

                                if len(filtered.index) > 0:
                                    print(str(len(filtered.index)))
                                    dates.add(datestring)
                                    filtered.to_sql(table,
                                                    engine,
                                                    if_exists='append',
                                                    index=False,
                                                    chunksize=1000)
                                    db.commit()

                # only if data hast been inserted, for all underlyings together
                if precompute:
                    precompute_bs_price.precompute("optiondata", date, "*",
                                                   True)
                    precompute_greeks.precompute("optiondata", date, "*", True)

                if ((unzippedpath != "") and (unzippedpath is not None)):
                    os.remove(unzippedpath)

    print("Done inserting data \n")

    db.close()
    return dates
def load(underlyings, dir, precompute_all):

    print("load: " + str(underlyings))
    print()

    engine = create_engine("postgresql://" + settings.db_username + ":" +
                           settings.db_password + "@127.0.0.1/optiondata")
    table = "optiondata"

    db = psycopg2.connect(
        host="localhost",
        user=settings.db_username,
        password=settings.db_password,
        database="optiondata",
    )
    cur2 = db.cursor()

    counter = 0
    dates_from_dir = set()
    dates_in_db = set()
    dates_to_load = set()

    # traverse root directory, and list directories as dirs and files as files
    # get all dates that need to be processed

    for root, dirs, files in os.walk(dir):
        for file in files:
            if file.endswith(".zip"):
                index = file.index("_") + 1
                file_datestring = file[index:(index + 10)]
                dates_from_dir.add(file_datestring)

    for underlying in underlyings:

        if len(dates_from_dir) > 0:

            query = (
                "SELECT DISTINCT quote_date FROM optiondata WHERE underlying_symbol = '"
                + underlying + "' AND quote_date IN " +
                str(dates_from_dir).replace("{", "(").replace("}", ")") + "")

            cur2.execute(query)
            rows = cur2.fetchall()
            for row in rows:
                dates_in_db.add(str(row[0]))

            dates_to_load = dates_from_dir - dates_in_db
            print("dates_from_dir: " + str(len(dates_from_dir)))
            print("dates_in_db: " + str(len(dates_in_db)))
            print("dates_to_load: " + str(len(dates_to_load)))
            print()

            for date in sorted(dates_to_load):

                if ((underlying in util.startdates) and
                    (datetime.strptime(date, '%Y-%m-%d').date() >
                     util.startdates[underlying])) or (underlying
                                                       not in util.startdates):

                    unzippedpath = ""
                    counter += 1
                    file = "UnderlyingOptionsEODQuotes_" + str(date) + ".zip"
                    print(
                        str(counter) + "\t" + date + "\t" + underlying + "\t" +
                        file)
                    currentdir = dir
                    if currentdir.endswith("/optiondata/"):
                        currentdir = currentdir + date[0:4] + "/"

                    datafilepath = currentdir + file
                    unzippedpath = util.unzip(datafilepath)

                    if unzippedpath is not None:

                        df = pd.read_csv(
                            unzippedpath,
                            header=0,
                            dtype={
                                "underlying_symbol": object,
                                "quote_date": object,
                                "root": object,
                                "expiration": object,
                                "strike": np.float64,
                                "option_type": object,
                                "open": np.float64,
                                "high": np.float64,
                                "low": np.float64,
                                "close": np.float64,
                                "trade_volume": np.int64,
                                "bid_size_1545": np.int64,
                                "bid_1545": np.float64,
                                "ask_size_1545": np.int64,
                                "ask_1545": np.float64,
                                "underlying_bid_1545": np.float64,
                                "underlying_ask_1545": np.float64,
                                "bid_size_eod": np.int64,
                                "bid_eod": np.float64,
                                "ask_size_eod": np.int64,
                                "ask_eod": np.float64,
                                "underlying_bid_eod": np.float64,
                                "underlying_ask_eod": np.float64,
                                "vwap": object,
                                "open_interest": np.float64,
                                "delivery_code": object,
                            },
                        )

                        filtered = df[(df["underlying_symbol"] == underlying)]

                        if underlying == "^SPX":
                            filtered = filtered[(filtered.root != "BSZ") & (
                                filtered.root !=
                                "BSK")]  # filter out binary options

                        filtered[
                            "option_type"] = filtered.option_type.str.lower()
                        filtered["mid_1545"] = (filtered["bid_1545"] +
                                                filtered["ask_1545"]) / 2
                        filtered["underlying_mid_1545"] = (
                            filtered["underlying_bid_1545"] +
                            filtered["underlying_ask_1545"]) / 2

                        if len(filtered.index) > 0:
                            print(str(len(filtered.index)))
                            filtered.to_sql(
                                table,
                                engine,
                                if_exists="append",
                                index=False,
                                chunksize=1000,
                            )
                            db.commit()

                    if (unzippedpath != "") and (unzippedpath is not None):
                        os.remove(unzippedpath)

            if precompute_all:
                precompute_counter = 0
                for date_to_load in sorted(dates_to_load):
                    counter += 1
                    print(str(counter) + ": " + date_to_load)
                    precompute_greeks.precompute("optiondata", date_to_load,
                                                 underlying, True)

    print("Done loading data \n")

    db.close()
    return dates_to_load