def data_transform(start_date,end_date):
    output_dir=directory()
    date_dir = os.listdir("D:/deri-bit/")
    datelist = []
    for t in date_dir:
        temp = datetime.datetime.strptime(t,'%Y%m%d').date()
        datelist.append(temp)

    for d in np.arange(start_date, end_date):
        date = np.datetime64(d).astype(datetime.date)
        # Initiate the underlying values
        if d in datelist:
            arrList = "D:/deri-bit/" + str(date).replace("-","")
            arr_all = os.listdir(arrList)
            arr=[]
            for f in arr_all:
                f_1 = f.split("-")
                if f_1[0] == "BTC":
                    arr.append("D:/deri-bit/"+str(date).replace("-","")+"/"+f)
            for f in arr:
                f_to_list = f.split('-')
                #  Options:
                if len(f_to_list) > 3:
                    cp_date=re.split('_(\d+)', f_to_list[4])
                    optionsDir = output_dir.data_download(type='option') + cp_date[1] + '_' + f_to_list[2] + '_' + f_to_list[3] + '_' + cp_date[0] +'.csv'
                    if not os.path.exists(optionsDir):
                        contractType = f_to_list[2].split('_')[0]
                        if not contractType.startswith('PERP'):
                            options = get_data(f)
                            optionsDF = options.all_to_dataframe()
                            optionsDF.to_csv(path_or_buf=optionsDir)

                # Perpetuals
                if len(f_to_list) == 3:
                    cp_date=re.split('_(\d+)', f_to_list[2])
                    perpetualDir = output_dir.data_download(type='perpetual') + cp_date[1] + '_'+ cp_date[0] +'.csv'
                    if not os.path.exists(perpetualDir):
                        contractType = f_to_list[2].split('_')[0]
                        if contractType.startswith('PERP'):
                            perpetual = get_data(f)
                            perpetualDF = perpetual.to_dataframe()
                            perpetualDF.to_csv(path_or_buf=perpetualDir)

                # Futures
                if len(f_to_list) == 3:
                    cp_date = re.split('_(\d+)', f_to_list[2])
                    futuresDir = output_dir.data_download(type='future') + cp_date[1] + '_' + cp_date[0] + '.csv'
                    if not os.path.exists(futuresDir):
                        contractType = f_to_list[2].split('_')[0]
                        if not contractType.startswith('PERP'):
                            futures = get_data(f)
                            futuresDF = futures.to_dataframe()
                            futuresDF.to_csv(path_or_buf=futuresDir)
                print('Finished: ', date, f)
def test_find_users_by_nationality():
    """Assert the correct list of users returned from the database when
    the function find_user_by_nationality is given various
    nationalities as input.
    """
    # Create test database and tables.
    db_filename = 'test_find_user_by_nationality.sqlite'
    database = sqlite3.connect(db_filename)
    cursor = database.cursor()

    users_filename = 'test_user_data.txt'
    user_data_packages = read_file.get_data(users_filename, package_size=3)
    user_data_rows = [create_database.unpack_user_data(user_data_package)
                      for user_data_package in user_data_packages]
    create_database.create_users_table(cursor, user_data_rows)

    # Check database for a nationality with 1 user.
    assert find_users_by_nationality('south korean', cursor) == ['orangelover1107']

    # Check database for a nationality with multiple users.
    assert find_users_by_nationality('american', cursor) == ['coit125', 'austinwiltshire']

    # Check database for a nationality with 0 users.
    assert not find_users_by_nationality('gondorian', cursor)

    # Check case-sensitivity.
    assert find_users_by_nationality('BRITISH', cursor) == ['bcummberbatch']
def test_create_date_formats_table():
    """Assert the correct contents of the users table table."""
    # Create database.
    db_filename = 'test_create_date_formats_table.sqlite'
    test_database = sqlite3.connect(db_filename)
    cursor = test_database.cursor()

    date_formats_filename = 'test_date_formats.txt'
    date_format_data_packages = read_file.get_data(date_formats_filename, package_size=1)
    date_format_data_rows = [create_database.unpack_date_format_data(date_format_data_package)
                             for date_format_data_package in date_format_data_packages]
    create_database.create_date_formats_table(cursor, date_format_data_rows)

    # Check date_formats table.
    statement = """SELECT *
                       FROM date_formats
                       WHERE nationality =?"""
    assert database.query(statement, 'american', cursor) == (u"american",
                                                             u"{month}/{day}/{year}")
    assert database.query(statement, 'british', cursor) == (u"british",
                                                            u"{day}/{month}/{year}")
    assert database.query(statement, 'south korean', cursor) == (u"south korean",
                                                                 u"{year}/{month}/{day}")

    test_database.close()
def test_create_users_table():
    """Assert the correct contents of the users table table."""
    # Create database.
    db_filename = 'test_create_users_table.sqlite'
    test_database = sqlite3.connect(db_filename)
    cursor = test_database.cursor()

    users_filename = 'test_user_data.txt'
    user_data_packages = read_file.get_data(users_filename, package_size=3)
    user_data_rows = [create_database.unpack_user_data(user_data_package)
                      for user_data_package in user_data_packages]
    create_database.create_users_table(cursor, user_data_rows)

    # Check users table.
    statement = """SELECT *
                       FROM users
                       WHERE username =?"""
    assert database.query(statement, 'coit125', cursor) == (u'coit125',
                                                            u'american',
                                                            u'01/25/1984')
    assert database.query(statement, 'bcummberbatch', cursor) == (u'bcummberbatch',
                                                                  u'british',
                                                                  u'04/21/1978')
    assert database.query(statement, 'orangelover1107', cursor) == (u'orangelover1107',
                                                                    u'south korean',
                                                                    u'11/07/1983')

    test_database.close()
def test_find_user():
    """Assert the correct return values of find_user() when given
    different username values to be found in the database.
    """
    # Create test database and tables.
    db_filename = 'test_find_user.sqlite'
    test_database = sqlite3.connect(db_filename)
    cursor = test_database.cursor()

    users_filename = 'test_user_data.txt'
    user_data_packages = read_file.get_data(users_filename, package_size=3)
    user_data_rows = [create_database.unpack_user_data(user_data_package)
                      for user_data_package in user_data_packages]
    create_database.create_users_table(cursor, user_data_rows)

    date_formats_filename = 'test_date_formats.txt'
    date_format_data_packages = read_file.get_data(date_formats_filename, package_size=1)
    date_format_data_rows = [create_database.unpack_date_format_data(date_format_data_package)
                             for date_format_data_package in date_format_data_packages]
    create_database.create_date_formats_table(cursor, date_format_data_rows)

    template = """username: %s
nationality: %s
birthdate: %s"""

    # Check mm/dd/yyyy style user.
    assert find_user('coit125', cursor) == template % ('coit125',
                                                       'american',
                                                       '1/25/1984')
    # Check dd/mm/yyyy style user.
    assert find_user('bcummberbatch', cursor) == template % ('bcummberbatch',
                                                             'british',
                                                             '21/4/1978')
    # Check yyyy/mm/dd style user.
    assert find_user('orangelover1107', cursor) == template % ('orangelover1107',
                                                               'south korean',
                                                               '1983/11/7')
    # Check case-sensitivity.
    assert find_user('COIT125', cursor) == template % ('coit125',
                                                       'american',
                                                       '1/25/1984')
    # Check non-user.
    assert not find_user('margesimpson', cursor)

    test_database.close()
Exemple #6
0
import chainer.links as L
from chainer import optimizers

from read_file import get_data
from parameters import Parameters
from Model import CNNModel

if __name__ == u'__main__':
    # args
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu', '-g', default=None, type=int)
    parser.add_argument('--datafile', '-d', default=None, type=str)
    args = parser.parse_args()

    # get data
    label_data, train_data = get_data(args.datafile)

    # params
    params = Parameters()

    # set models
    cnn_model = CNNModel(len(train_data[0]), len(train_data[0][0]),
                         params.filter_size, max(label_data) + 1)
    objective = L.Classifier(cnn_model)
    objective.compute_accuracy = False

    # for cuda
    if args.gpu is None:
        xp = np
    else:
        xp = cuda.cupy
Exemple #7
0
    # Initiate the underlying values
    client = DataClient('3.17.63.79', 22, 'ec2-user', 'Waa9b25xyk')
    client.connect()
    arr = client.getfiles(str(date).replace("-", ""), 'BTC')
    for f in arr:
        f_to_list = f.split('-')
        #  Options:
        if len(f_to_list) > 3:
            cp_date = re.split('_(\d+)', f_to_list[4])
            optionsDir = output_dir.data_download(
                type='option') + cp_date[1] + '_' + f_to_list[
                    2] + '_' + f_to_list[3] + '_' + cp_date[0] + '.csv'
            if not os.path.exists(optionsDir):
                contractType = f_to_list[2].split('_')[0]
                if not contractType.startswith('PERP'):
                    options = get_data(f)
                    optionsDF = options.all_to_dataframe()
                    optionsDF.to_csv(path_or_buf=optionsDir)

        # Perpetuals
        if len(f_to_list) == 3:
            cp_date = re.split('_(\d+)', f_to_list[2])
            perpetualDir = output_dir.data_download(
                type='perpetual') + cp_date[1] + '_' + cp_date[0] + '.csv'
            if not os.path.exists(perpetualDir):
                contractType = f_to_list[2].split('_')[0]
                if contractType.startswith('PERP'):
                    perpetual = get_data(f)
                    perpetualDF = perpetual.to_dataframe()
                    perpetualDF.to_csv(path_or_buf=perpetualDir)
    # Insert data into the date_formats table.
    for data_row in data_rows:
        if data_row:
            assert len(data_row) == 2, "Wrong amount of information in the data row."
            db_cursor.execute("""INSERT INTO date_formats(nationality, date_format)
                              VALUES (?,?)""", (data_row))

if __name__ == "__main__":
    # Create a new database and connect to it.
    DB_FILENAME = 'database.sqlite' # Insert desired database filename here.
    DATABASE = sqlite3.connect(DB_FILENAME)
    DB_CURSOR = DATABASE.cursor()

    # Create 'users' table in database.
    USERS_FILENAME = 'user_data.txt' # Insert user filename here.
    USER_DATA_PACKAGES = read_file.get_data(USERS_FILENAME, package_size=3)
    USER_DATA_ROWS = [unpack_user_data(USER_DATA_PACKAGE)
                      for USER_DATA_PACKAGE in USER_DATA_PACKAGES]
    create_users_table(DB_CURSOR, USER_DATA_ROWS)

    # Create 'date_formats' table in database.
    DATE_FORMATS_FILENAME = 'date_formats.txt' # Insert format filename here.
    DATE_FORMAT_DATA_PACKAGES = read_file.get_data(DATE_FORMATS_FILENAME, package_size=1)
    DATE_FORMAT_DATA_ROWS = [unpack_date_format_data(DATE_FORMAT_DATA_PACKAGE)
                             for DATE_FORMAT_DATA_PACKAGE in DATE_FORMAT_DATA_PACKAGES]
    create_date_formats_table(DB_CURSOR, DATE_FORMAT_DATA_ROWS)

    # Save and close the database.
    DATABASE.commit()
    DATABASE.close()
Exemple #9
0
            ynew = f(xnew)
            ax.plot(x, y, 'o')
            ax.plot(xnew, ynew, label=headings[i], linewidth=2)
        elif scale == "log":
            ax.plot(x, y, 'o', label=headings[i])

    ax.xaxis.label.set_fontsize(24)
    ax.yaxis.label.set_fontsize(24)

    ax.legend(ncol=2, frameon=False, fontsize=20)
    plt.show()
    fig.savefig(output)


if __name__ == '__main__':
    titles, data = read_file.get_data("timings")
    data = data.astype(float)

    data = np.transpose(data)
    timings_plot(titles,
                 data,
                 y_min=0,
                 y_max=700,
                 output="timings_smallscale.pdf")
    timings_plot(titles,
                 data,
                 y_min=0,
                 y_max=3500,
                 output="timings_largescale.pdf")
    timings_plot(titles, data, scale="log", output="timings_log.pdf")
import bar_plot
import read_file
import numpy as np

if __name__ == '__main__':
    filename = "deltascf"
    titles, data = read_file.get_data(filename)
    data_labels = ['Singlets', 'Triplets']

    mean_errors = np.column_stack((data[0], data[2]))
    mae = np.column_stack((data[1], data[3]))

    bar_plot.stacked_bar_graphs(titles, data_labels, mean_errors, mae,
                                "deltascf.pdf", -0.4, 1, 2, 3)
Exemple #11
0
    ax.set_ylim(y_min, y_max)
    ax.set_ylabel('Errors / $eV$')

    ax.xaxis.label.set_fontsize(24)
    ax.yaxis.label.set_fontsize(24)
    ax.tick_params(axis='both', which='major', labelsize=20, pad=10)
    ax.yaxis.set_major_formatter(FormatStrFormatter('%.1f'))

    # Create legend & Show graphic
    plt.legend(fontsize=20,
               loc=4,
               ncol=2,
               frameon=False,
               columnspacing=0.2,
               borderaxespad=-0.2)
    plt.show()
    fig.savefig(output_filename)


if __name__ == '__main__':
    filename1 = sys.argv[1]
    filename2 = sys.argv[2]
    titles1, data1 = read_file.get_data(filename1)
    titles2, data2 = read_file.get_data(filename2)
    data_labels = ['Barrier Heights', 'Reaction Energies']

    mean_errors = np.column_stack((data1[0], data2[0]))
    mae = np.column_stack((data1[1], data2[1]))

    stacked_bar_graphs(titles1, data_labels, mean_errors, mae, "reactions.pdf")