def data_transform(start_date,end_date): output_dir=directory() date_dir = os.listdir("D:/deri-bit/") datelist = [] for t in date_dir: temp = datetime.datetime.strptime(t,'%Y%m%d').date() datelist.append(temp) for d in np.arange(start_date, end_date): date = np.datetime64(d).astype(datetime.date) # Initiate the underlying values if d in datelist: arrList = "D:/deri-bit/" + str(date).replace("-","") arr_all = os.listdir(arrList) arr=[] for f in arr_all: f_1 = f.split("-") if f_1[0] == "BTC": arr.append("D:/deri-bit/"+str(date).replace("-","")+"/"+f) for f in arr: f_to_list = f.split('-') # Options: if len(f_to_list) > 3: cp_date=re.split('_(\d+)', f_to_list[4]) optionsDir = output_dir.data_download(type='option') + cp_date[1] + '_' + f_to_list[2] + '_' + f_to_list[3] + '_' + cp_date[0] +'.csv' if not os.path.exists(optionsDir): contractType = f_to_list[2].split('_')[0] if not contractType.startswith('PERP'): options = get_data(f) optionsDF = options.all_to_dataframe() optionsDF.to_csv(path_or_buf=optionsDir) # Perpetuals if len(f_to_list) == 3: cp_date=re.split('_(\d+)', f_to_list[2]) perpetualDir = output_dir.data_download(type='perpetual') + cp_date[1] + '_'+ cp_date[0] +'.csv' if not os.path.exists(perpetualDir): contractType = f_to_list[2].split('_')[0] if contractType.startswith('PERP'): perpetual = get_data(f) perpetualDF = perpetual.to_dataframe() perpetualDF.to_csv(path_or_buf=perpetualDir) # Futures if len(f_to_list) == 3: cp_date = re.split('_(\d+)', f_to_list[2]) futuresDir = output_dir.data_download(type='future') + cp_date[1] + '_' + cp_date[0] + '.csv' if not os.path.exists(futuresDir): contractType = f_to_list[2].split('_')[0] if not contractType.startswith('PERP'): futures = get_data(f) futuresDF = futures.to_dataframe() futuresDF.to_csv(path_or_buf=futuresDir) print('Finished: ', date, f)
def test_find_users_by_nationality(): """Assert the correct list of users returned from the database when the function find_user_by_nationality is given various nationalities as input. """ # Create test database and tables. db_filename = 'test_find_user_by_nationality.sqlite' database = sqlite3.connect(db_filename) cursor = database.cursor() users_filename = 'test_user_data.txt' user_data_packages = read_file.get_data(users_filename, package_size=3) user_data_rows = [create_database.unpack_user_data(user_data_package) for user_data_package in user_data_packages] create_database.create_users_table(cursor, user_data_rows) # Check database for a nationality with 1 user. assert find_users_by_nationality('south korean', cursor) == ['orangelover1107'] # Check database for a nationality with multiple users. assert find_users_by_nationality('american', cursor) == ['coit125', 'austinwiltshire'] # Check database for a nationality with 0 users. assert not find_users_by_nationality('gondorian', cursor) # Check case-sensitivity. assert find_users_by_nationality('BRITISH', cursor) == ['bcummberbatch']
def test_create_date_formats_table(): """Assert the correct contents of the users table table.""" # Create database. db_filename = 'test_create_date_formats_table.sqlite' test_database = sqlite3.connect(db_filename) cursor = test_database.cursor() date_formats_filename = 'test_date_formats.txt' date_format_data_packages = read_file.get_data(date_formats_filename, package_size=1) date_format_data_rows = [create_database.unpack_date_format_data(date_format_data_package) for date_format_data_package in date_format_data_packages] create_database.create_date_formats_table(cursor, date_format_data_rows) # Check date_formats table. statement = """SELECT * FROM date_formats WHERE nationality =?""" assert database.query(statement, 'american', cursor) == (u"american", u"{month}/{day}/{year}") assert database.query(statement, 'british', cursor) == (u"british", u"{day}/{month}/{year}") assert database.query(statement, 'south korean', cursor) == (u"south korean", u"{year}/{month}/{day}") test_database.close()
def test_create_users_table(): """Assert the correct contents of the users table table.""" # Create database. db_filename = 'test_create_users_table.sqlite' test_database = sqlite3.connect(db_filename) cursor = test_database.cursor() users_filename = 'test_user_data.txt' user_data_packages = read_file.get_data(users_filename, package_size=3) user_data_rows = [create_database.unpack_user_data(user_data_package) for user_data_package in user_data_packages] create_database.create_users_table(cursor, user_data_rows) # Check users table. statement = """SELECT * FROM users WHERE username =?""" assert database.query(statement, 'coit125', cursor) == (u'coit125', u'american', u'01/25/1984') assert database.query(statement, 'bcummberbatch', cursor) == (u'bcummberbatch', u'british', u'04/21/1978') assert database.query(statement, 'orangelover1107', cursor) == (u'orangelover1107', u'south korean', u'11/07/1983') test_database.close()
def test_find_user(): """Assert the correct return values of find_user() when given different username values to be found in the database. """ # Create test database and tables. db_filename = 'test_find_user.sqlite' test_database = sqlite3.connect(db_filename) cursor = test_database.cursor() users_filename = 'test_user_data.txt' user_data_packages = read_file.get_data(users_filename, package_size=3) user_data_rows = [create_database.unpack_user_data(user_data_package) for user_data_package in user_data_packages] create_database.create_users_table(cursor, user_data_rows) date_formats_filename = 'test_date_formats.txt' date_format_data_packages = read_file.get_data(date_formats_filename, package_size=1) date_format_data_rows = [create_database.unpack_date_format_data(date_format_data_package) for date_format_data_package in date_format_data_packages] create_database.create_date_formats_table(cursor, date_format_data_rows) template = """username: %s nationality: %s birthdate: %s""" # Check mm/dd/yyyy style user. assert find_user('coit125', cursor) == template % ('coit125', 'american', '1/25/1984') # Check dd/mm/yyyy style user. assert find_user('bcummberbatch', cursor) == template % ('bcummberbatch', 'british', '21/4/1978') # Check yyyy/mm/dd style user. assert find_user('orangelover1107', cursor) == template % ('orangelover1107', 'south korean', '1983/11/7') # Check case-sensitivity. assert find_user('COIT125', cursor) == template % ('coit125', 'american', '1/25/1984') # Check non-user. assert not find_user('margesimpson', cursor) test_database.close()
import chainer.links as L from chainer import optimizers from read_file import get_data from parameters import Parameters from Model import CNNModel if __name__ == u'__main__': # args parser = argparse.ArgumentParser() parser.add_argument('--gpu', '-g', default=None, type=int) parser.add_argument('--datafile', '-d', default=None, type=str) args = parser.parse_args() # get data label_data, train_data = get_data(args.datafile) # params params = Parameters() # set models cnn_model = CNNModel(len(train_data[0]), len(train_data[0][0]), params.filter_size, max(label_data) + 1) objective = L.Classifier(cnn_model) objective.compute_accuracy = False # for cuda if args.gpu is None: xp = np else: xp = cuda.cupy
# Initiate the underlying values client = DataClient('3.17.63.79', 22, 'ec2-user', 'Waa9b25xyk') client.connect() arr = client.getfiles(str(date).replace("-", ""), 'BTC') for f in arr: f_to_list = f.split('-') # Options: if len(f_to_list) > 3: cp_date = re.split('_(\d+)', f_to_list[4]) optionsDir = output_dir.data_download( type='option') + cp_date[1] + '_' + f_to_list[ 2] + '_' + f_to_list[3] + '_' + cp_date[0] + '.csv' if not os.path.exists(optionsDir): contractType = f_to_list[2].split('_')[0] if not contractType.startswith('PERP'): options = get_data(f) optionsDF = options.all_to_dataframe() optionsDF.to_csv(path_or_buf=optionsDir) # Perpetuals if len(f_to_list) == 3: cp_date = re.split('_(\d+)', f_to_list[2]) perpetualDir = output_dir.data_download( type='perpetual') + cp_date[1] + '_' + cp_date[0] + '.csv' if not os.path.exists(perpetualDir): contractType = f_to_list[2].split('_')[0] if contractType.startswith('PERP'): perpetual = get_data(f) perpetualDF = perpetual.to_dataframe() perpetualDF.to_csv(path_or_buf=perpetualDir)
# Insert data into the date_formats table. for data_row in data_rows: if data_row: assert len(data_row) == 2, "Wrong amount of information in the data row." db_cursor.execute("""INSERT INTO date_formats(nationality, date_format) VALUES (?,?)""", (data_row)) if __name__ == "__main__": # Create a new database and connect to it. DB_FILENAME = 'database.sqlite' # Insert desired database filename here. DATABASE = sqlite3.connect(DB_FILENAME) DB_CURSOR = DATABASE.cursor() # Create 'users' table in database. USERS_FILENAME = 'user_data.txt' # Insert user filename here. USER_DATA_PACKAGES = read_file.get_data(USERS_FILENAME, package_size=3) USER_DATA_ROWS = [unpack_user_data(USER_DATA_PACKAGE) for USER_DATA_PACKAGE in USER_DATA_PACKAGES] create_users_table(DB_CURSOR, USER_DATA_ROWS) # Create 'date_formats' table in database. DATE_FORMATS_FILENAME = 'date_formats.txt' # Insert format filename here. DATE_FORMAT_DATA_PACKAGES = read_file.get_data(DATE_FORMATS_FILENAME, package_size=1) DATE_FORMAT_DATA_ROWS = [unpack_date_format_data(DATE_FORMAT_DATA_PACKAGE) for DATE_FORMAT_DATA_PACKAGE in DATE_FORMAT_DATA_PACKAGES] create_date_formats_table(DB_CURSOR, DATE_FORMAT_DATA_ROWS) # Save and close the database. DATABASE.commit() DATABASE.close()
ynew = f(xnew) ax.plot(x, y, 'o') ax.plot(xnew, ynew, label=headings[i], linewidth=2) elif scale == "log": ax.plot(x, y, 'o', label=headings[i]) ax.xaxis.label.set_fontsize(24) ax.yaxis.label.set_fontsize(24) ax.legend(ncol=2, frameon=False, fontsize=20) plt.show() fig.savefig(output) if __name__ == '__main__': titles, data = read_file.get_data("timings") data = data.astype(float) data = np.transpose(data) timings_plot(titles, data, y_min=0, y_max=700, output="timings_smallscale.pdf") timings_plot(titles, data, y_min=0, y_max=3500, output="timings_largescale.pdf") timings_plot(titles, data, scale="log", output="timings_log.pdf")
import bar_plot import read_file import numpy as np if __name__ == '__main__': filename = "deltascf" titles, data = read_file.get_data(filename) data_labels = ['Singlets', 'Triplets'] mean_errors = np.column_stack((data[0], data[2])) mae = np.column_stack((data[1], data[3])) bar_plot.stacked_bar_graphs(titles, data_labels, mean_errors, mae, "deltascf.pdf", -0.4, 1, 2, 3)
ax.set_ylim(y_min, y_max) ax.set_ylabel('Errors / $eV$') ax.xaxis.label.set_fontsize(24) ax.yaxis.label.set_fontsize(24) ax.tick_params(axis='both', which='major', labelsize=20, pad=10) ax.yaxis.set_major_formatter(FormatStrFormatter('%.1f')) # Create legend & Show graphic plt.legend(fontsize=20, loc=4, ncol=2, frameon=False, columnspacing=0.2, borderaxespad=-0.2) plt.show() fig.savefig(output_filename) if __name__ == '__main__': filename1 = sys.argv[1] filename2 = sys.argv[2] titles1, data1 = read_file.get_data(filename1) titles2, data2 = read_file.get_data(filename2) data_labels = ['Barrier Heights', 'Reaction Energies'] mean_errors = np.column_stack((data1[0], data2[0])) mae = np.column_stack((data1[1], data2[1])) stacked_bar_graphs(titles1, data_labels, mean_errors, mae, "reactions.pdf")