def main(): if (len(sys.argv) != 4): print( "Three Arguments needed! How to: python3 delete_ml_model.py <instanceType> <productDescription> <region>" ) exit(0) region = str(sys.argv[3]) instance_type = str(sys.argv[1]) product_description = str(sys.argv[2]) try: mlobj = MLModel(None, None, instance_type, replace_name(product_description), region) mlobj.delete_model() unmark_trained_spots(instance_type, product_description, region) print('Ml model %s %s deleted' % (instance_type, product_description)) except: print('Could not delete model %s %s' % (instance_type, product_description))
def stock_predict(stock, quotes, divide_date): try: print("开始 :", stock, ", 时间 :", time.ctime()) quotes = quotes.sort_values(by=['tradeDate'], ascending=True) train_quotes = quotes[quotes['tradeDate'] < divide_date].sort_values( by=['tradeDate'], ascending=True) time_steps = 5 # 每次训练的数据长度 total_days = len(quotes) - 1 train_days = len(train_quotes) - 1 # 减去计算不到涨跌幅的第一天 # 数据 close_total = np.array(quotes['closePrice']) change_total = np.zeros(total_days) # 涨跌幅 for i in range(total_days): change_total[i] = (close_total[i + 1] - close_total[i]) / close_total[i] * 100 turnover_vol_total = np.array(quotes['turnoverVol']) turnover_value_total = np.array(quotes['turnoverValue']) / 10000 negmarket_value_total = np.array(quotes['negMarketValue']) / 1000000 x_total, y_total = generate_x_and_y(time_steps, change_total, change_total, turnover_vol_total[1:], turnover_value_total[1:], negmarket_value_total[1:]) x_train = x_total[0:train_days - time_steps] y_train = y_total[0:train_days - time_steps] x_test = x_total[train_days - time_steps:] y_test = y_total[train_days - time_steps:] y_test = np.argmax(y_test, axis=1) - 2 # 构建模型 model = MLModel(input_shape=(x_train.shape[1], x_train.shape[2]), stock_name=stock) # 训练模型 model.train_model(x_train=x_train, y_train=y_train, epoch=5, batch_size=32) # 预测结果 y_predict = model.predict(x_test) y_predict = np.argmax(y_predict, axis=1) - 2 print("y_predict: ", y_predict) correct_num = 0 rough_correct_num = 0 for i in range(y_test.shape[0]): if y_predict[i] == y_test[i]: correct_num += 1 if (y_predict[i] > 0 and y_test[i] > 0) or ( y_predict[i] < 0 and y_test[i] < 0) or (y_predict[i] == 0 and y_test[i] == 0): rough_correct_num += 1 print(stock, "准确的大涨、大跌、震荡、小跌、大跌的预测准确率:", correct_num / y_test.shape[0]) print(stock, "涨跌平的预测准确率:", rough_correct_num / y_test.shape[0]) stock_operation(stock_name=stock, change=change_total[train_days:], close=close_total[train_days:], mean=(turnover_value_total * 10000 / turnover_vol_total)[train_days + 1:], predict_state=y_predict) print("结束 :", stock, ", =====================================时间 :", time.ctime()) except Exception as e: print(e)
def main(): if (len(sys.argv) != 5): print( "Four Arguments needed! How to: python3 train_ml_model.py <instanceType> <productDescription> <region> <test run = 1 or actual run = 2>" ) exit(0) version = int(sys.argv[4]) if (version != 1 and version != 2): print("Last argument has to be 1 or 2") exit(0) region = str(sys.argv[3]) instance_type = str(sys.argv[1]) product_description = str(sys.argv[2]) path = os.path.normpath(os.getcwd() + os.sep + os.pardir) training_file = path + '/backend/training_data/' + instance_type + '_' + replace_name( product_description) + '_v1.csv' gen = GenerateTrainingData(training_file) if (gen.generate(instance_type, product_description)): df = pd.read_csv(training_file, sep=',') zones = None if (region == 'worldwide'): zones = df['AvailabilityZone'].drop_duplicates().values #zones = ['ap-northeast-1a', 'eu-west-3a'] else: all_zones = df['AvailabilityZone'].drop_duplicates().values zones = [s for s in all_zones if region in s] print(zones) for x in zones: #for x in ['ap-northeast-1a', 'ap-northeast-1c']: try: print('Train AvailabilityZone: ' + str(x)) rep_product_description = replace_name(product_description) architecture_name = instance_type + '_' + rep_product_description + '_' + str( x) + '_architecture.json' weights_name = instance_type + '_' + rep_product_description + '_' + str( x) + '_weights.h5' mlobj = MLModel(weights_name, architecture_name, instance_type, rep_product_description, region) model = mlobj.getModel() model.compile(optimizer='nadam', loss='mean_squared_error', metrics=['accuracy']) training_features, labels, scaler = mlobj.generate_training_data( df, x, version, 1) model = mlobj.train(model, training_features, labels) mlobj.save_model(model) except: print('Skip AvailabilityZone:' + str(x)) print('Trained:', instance_type, product_description) #with open('trained.csv', 'a') as f: # f.write("%s, %s\n" % (instance_type, product_description)) if (version == 2): mark_trained_spots(instance_type, product_description, region)
def window_predict(stock, window_len, time_steps, flat_num, close, change, mean, data): """ 滑动窗口的频繁训练预测 :param stock: 股票 :param window_len: 窗口长度 :param time_steps: 样本长度 :param flat_num: 涨跌平的区分值 :param close: 收盘价 :param change: 涨跌幅 :param mean: 均价 :param data: 样本数据 :return: """ # 设置日志 logs_dir = os.path.join(os.path.curdir, "logs") if os.path.exists(logs_dir) and os.path.isdir(logs_dir): pass else: os.mkdir(logs_dir) logger = logging.getLogger() logger.setLevel('DEBUG') BASIC_FORMAT = "%(asctime)s - %(levelname)s - %(message)s" DATE_FORMAT = '%Y/%m/%d %H:%M:%S' formatter = logging.Formatter(BASIC_FORMAT, DATE_FORMAT) chlr = logging.StreamHandler() # 输出到控制台的handler chlr.setFormatter(formatter) chlr.setLevel('INFO') fhlr = logging.FileHandler(str('logs/' + stock + '.txt'), encoding='utf-8') # 输出到文件的handler fhlr.setFormatter(formatter) fhlr.setLevel('INFO') logger.addHandler(chlr) logger.addHandler(fhlr) logging.info("stock:" + stock + ",window_len:" + str(window_len) + ",time_steps:" + str(time_steps) + ",flat_num:" + str(flat_num)) program_start_time = time.time() # 初始曲线的数据 base_money = close[window_len - 1] # 每天的金额,以第一天的前一天的收盘价为基础金额 base_money_fee = base_money # 每天的金额,含手续费计算 base = 1 # 用来计算收益率 base_fee = base # 用来计算含手续费的收益率 model_line = [base_money] # 记录每天的金额的列表 model_line_fee = [base_money] # 记录含手续费的每天的金额的列表 buyed = 1 # 股票是否已经购买的状态 buy_num = 0 # 购买股票的天数 hold_num = 0 # 持有股票的天数 sell_num = 0 # 抛出股票的天数 empty_num = 0 # 空仓的天数 up_num = 0 # 预测涨正确的天数 down_num = 0 # 预测跌正确的天数 medium_num = 0 # 预测平正确的天数 actual_up_num = 0 # 涨的天数 actual_down_num = 0 # 跌的天数 actual_medium_num = 0 # 平的天数 predict_up_num_2 = 0 predict_down_num_2 = 0 actual_up_num_2 = 0 actual_down_num_2 = 0 predict_result = [] actual_result = [] for i in range(close.shape[0] - window_len): # 窗口滑动,计算金额变化 once_start_time = time.time() x_total, y_total = generate_x_and_y_3(flat_num, time_steps, change[i:i + window_len + 1], data[i:i + window_len + 1]) x_train = x_total[0:window_len - time_steps] y_train = y_total[0:window_len - time_steps] x_test = x_total[window_len - time_steps:] y_test = y_total[window_len - time_steps:] y_test = np.argmax(y_test, axis=1) - 1 actual_state = y_test[0] actual_result.append(actual_state) # 构建模型 model = MLModel(input_shape=(x_train.shape[1], x_train.shape[2]), stock_name=stock) # 训练模型 model.train_model(x_train=x_train, y_train=y_train, epoch=10, batch_size=4, verbose=0) # verbose=0不显示训练过程 # 预测结果 y_predict = model.predict(x_test) y_predict = np.argmax(y_predict, axis=1) - 1 predict_state = y_predict[0] predict_result.append(predict_state) if change[i + window_len] >= 0: actual_up_num_2 += 1 if (predict_state > 0 and (not buyed)) or (predict_state >= 0 and buyed): predict_up_num_2 += 1 else: actual_down_num_2 += 1 if (predict_state < 0 and buyed) or (predict_state <= 0 and not buyed): predict_down_num_2 += 1 if actual_state > 0: actual_up_num += 1 if predict_state > 0: up_num += 1 elif actual_state < 0: actual_down_num += 1 if predict_state < 0: down_num += 1 elif actual_state == 0: actual_medium_num += 1 if predict_state == 0: medium_num += 1 if predict_state > 0 and (not buyed): # 预测结果涨 且 没有持有股票, 买入,手续费0.00032 buyed = 1 buy_num += 1 rate_temp = (mean[i + window_len] - close[i + window_len - 1] ) / close[i + window_len - 1] # 基于第二天股票均价相对于第一天收盘价的涨跌幅 base = base * (1 + rate_temp) base_money = base_money * (1 + rate_temp) base_fee = base_fee * (1 + rate_temp) * (1 - 0.00032) base_money_fee = base_money_fee * (1 + rate_temp) * (1 - 0.00032) elif predict_state < 0 and buyed: # 预测结果为跌 且 持有股票,抛出,手续费0.00132 buyed = 0 sell_num += 1 rate_temp = (mean[i + window_len] - close[i + window_len - 1] ) / close[i + window_len - 1] # 基于第二天股票均价相对于第一天收盘价的涨跌幅 base = base * (1 + rate_temp) base_money = base_money * (1 + rate_temp) base_fee = base_fee * (1 + rate_temp) * (1 - 0.00132) base_money_fee = base_money_fee * (1 + rate_temp) * (1 - 0.00132) elif predict_state >= 0 and buyed: # 预测结果为涨 且 持有股票,不进行操作 hold_num += 1 base = base * (1 + change[i + window_len] / 100) base_money = base_money * (1 + change[i + window_len] / 100) base_fee = base_fee * (1 + change[i + window_len] / 100) base_money_fee = base_money_fee * (1 + change[i + window_len] / 100) else: # 预测结果为跌 且 没有持有股票,不进行操作 empty_num += 1 model_line.append(base_money) model_line_fee.append(base_money_fee) once_end_time = time.time() logging.info('总次数:' + str(close.shape[0] - window_len) + ',当前第' + str(i + 1) + '次循环用时:' + str(once_end_time - once_start_time) + '秒') model_line = np.array(model_line) model_line_fee = np.array(model_line_fee) # result_compare = np.column_stack([predict_result, actual_result]).transpose() # print('预测结果序列, 实际结果序列') # print(result_compare) predict_result = np.array(predict_result) actual_result = np.array(actual_result) logging.info('预测结果序列' + str(predict_result)) logging.info('实际结果序列' + str(actual_result)) logging.info(stock + '预测涨正确的比例' + str(up_num / actual_up_num * 100) + '%') logging.info(stock + '预测跌正确的比例' + str(down_num / actual_down_num * 100) + '%') logging.info(stock + '预测平正确的比例' + str(medium_num / actual_medium_num * 100) + '%') logging.info(stock + '操作涨正确的比例' + str(predict_up_num_2 / actual_up_num_2 * 100) + '%') logging.info(stock + '操作跌正确的比例' + str(predict_down_num_2 / actual_down_num_2 * 100) + '%') logging.info(stock + '总天数:' + str(close.shape[0] - window_len) + ',买入天数: ' + str(buy_num) + ',卖出天数: ' + str(sell_num) + ',持有天数: ' + str(hold_num) + ',空仓天数: ' + str(empty_num)) logging.info(stock + '初始金额 : ' + str(close[window_len - 1]) + ' , 实际最终金额 : ' + str(close[-1])) logging.info(stock + '最终金额: ' + str(base_money) + ' , 最终金额(含交易费): ' + str(base_money_fee)) logging.info(stock + "收益率: " + str(base - 1) + " , 收益率(含交易费): " + str(base_fee - 1)) program_end_time = time.time() logging.info('程序总用时:' + str((program_end_time - program_start_time) / 60) + '分钟') # 绘制曲线图 fig = plt.figure() plt.plot(close[window_len - 1:], color='green', label='Real Stock Price') plt.plot(model_line, color='red', label='Predicted Without Fee') plt.plot(model_line_fee, color='yellow', label='Predicted With Fee') plt.title(label='Stock Prediction') plt.xlabel(xlabel='Time') plt.ylabel(ylabel='Stock Price') plt.legend(loc='upper left') plt.show() fig.savefig("pictures\\window_" + stock + ".png")
def main(): if (len(sys.argv) != 6): print( "Five Arguments needed! How to: python3 predict_ml_model.py <instanceType> <productDescription> <id> <region> <test run = 1 or actual run = 2>" ) exit(0) version = int(sys.argv[5]) if (version != 1 and version != 2): print("Last argument has to be 1 or 2") exit(0) region = str(sys.argv[4]) instance_type = str(sys.argv[1]) product_description = str(sys.argv[2]) image_id = str(sys.argv[3]) path = os.path.normpath(os.getcwd() + os.sep + os.pardir) training_file = path + '/backend/training_data/' + instance_type + '_' + replace_name( product_description) + '_v2.csv' gen = GenerateTrainingData(training_file) if (gen.generate(instance_type, product_description) == 0): exit(0) df = pd.read_csv(training_file, sep=',') zones = None if (region == 'worldwide'): zones = df['AvailabilityZone'].drop_duplicates().values #zones = ['ap-northeast-1a', 'eu-west-3a'] else: all_zones = df['AvailabilityZone'].drop_duplicates().values zones = [s for s in all_zones if region in s] rep_product_description = replace_name(product_description) file_name = 'predictions/' + instance_type + '_' + rep_product_description + '_' + image_id + '.csv' try: if (os.path.isfile(file_name)): df_old = pd.read_csv(file_name, sep=',') else: df_old = pd.Series([]) #os.remove(file_name) except: df_old = pd.Series([]) pass print(zones) list1 = [] list2 = [] for x in zones: #for x in ['ap-southeast-1b']: try: architecture_name = instance_type + '_' + rep_product_description + '_' + str( x) + '_architecture.json' weights_name = instance_type + '_' + rep_product_description + '_' + str( x) + '_weights.h5' mlobj = MLModel(weights_name, architecture_name, instance_type, rep_product_description, region) model = mlobj.load_model() model.compile(optimizer='nadam', loss='mean_squared_error', metrics=['accuracy']) training_features, labels, scaler = mlobj.generate_training_data( df, x, version, 0) #only needed for the scaler prediction = 0 if (version == 1): test_predictions, test_data = mlobj.predict_testdata( df, scaler, x) #predict test data prediction = sum(test_predictions) sum_test = sum(test_data[:, 0]) print(test_data[:, 0]) print(test_predictions) mse_outcome, mae_outcome, mape_outcome = mlobj.getErrors( test_predictions, test_data[:, 0]) print(round(mape_outcome, 4), round(sum_test, 4), round(prediction, 4), round(prediction - sum_test, 4)) plot_all(test_predictions, test_data[:, 0], 250, instance_type, product_description, x) elif (version == 2): future_predictions = mlobj.predict_future(df, scaler, x) #predict future prediction = sum(future_predictions) list1.append(round(prediction, 4)) list2.append(x) #with open(file_name, 'a+') as f: # f.write("%s,%s\n" % (round(prediction, 4), x)) except: print('Skip', str(x)) pd_series1 = pd.Series(list1) pd_series2 = pd.Series(list2) if (len(df_old) == 0): df_old = pd.concat([pd_series1, pd_series2], axis=1) else: df_old = pd.concat([df_old, pd_series1, pd_series2], axis=1) print(df_old) df_old.to_csv(file_name, index=False) #with open(file_name, 'a+') as f: # f.write("%s,%s\n" % (round(prediction, 4), x)) exit(0)
def main(args=None, **kwargs): """ Main function of DCMM gatherer. The arguments are keyword arguments are present due to possible exeuction from celery. Keyword arguments cointains already filled tokens, passwords to avoid prompting. """ cmdline_arguments = DCMMArguments(args).args # Github credentials github_username = cmdline_arguments.github_username github_token = None # Jira credentials jira_username = cmdline_arguments.jira_username jira_password = None # Nosql credentials nosql_username = None nosql_password = None if cmdline_arguments.token_authentication_github: github_token = getpass.getpass(prompt="Please provide github " "authorization token " "for accessing API.") elif 'token_authentication_github' in kwargs: github_token = kwargs['token_authentication_github'] if cmdline_arguments.password_authentication_jira: jira_password = getpass.getpass(prompt="Please provide jira " "password for accessing API.") elif 'password_authentication_jira' in kwargs: jira_password = kwargs['password_authentication_jira'] if cmdline_arguments.nosql_username: nosql_username = input("Specify username for nosql database:") elif 'nosql_username' in kwargs: nosql_username = kwargs['nosql_username'] if cmdline_arguments.nosql_password: nosql_password = getpass.getpass( prompt="Provide password for nosql database:") elif 'nosql_password' in kwargs: nosql_password = kwargs['nosql_password'] # Create temporary variable for non destruction of the constant value single_gathering_urls = copy.deepcopy(gathering_urls) for key, url in single_gathering_urls.items(): new_url = url if key is JIRA and USER_AUTH in url[1]: # Add user authentication to JIRA gathering url new_url = [url[0], {USER_AUTH: ['', '', '']}] new_url[1][USER_AUTH][0] = \ url[1][USER_AUTH][0].format(username=jira_username) new_url[1][USER_AUTH][1] = \ url[1][USER_AUTH][1].format(password=jira_password) elif key is GOOGLE_CALENDAR and JSON_AUTH in url[1]: # Add credentials to GC gathering urls new_url = [ url[0], { JSON_AUTH: [ url[1][JSON_AUTH][0].format(file_path=cmdline_arguments .google_credentials_json), '' ] } ] elif key is GITHUB and USER_AUTH in url[1]: new_url = [ GITHUB_API + repository for repository in cmdline_arguments.repositories ] new_url = [new_url, {USER_AUTH: ['', '', '']}] new_url[1][USER_AUTH][2] = \ urllib3.make_headers(user_agent="python-urllib3/1.25.11", basic_auth=github_username + ':' + github_token) elif key is TRELLO: new_url = [url[0]['http'].format(http_path='')] # Update gathering_url when new information added single_gathering_urls[key] = new_url # Update key of gathering urls single_gathering_urls[GITHUB + github_username] = single_gathering_urls.pop(GITHUB) api_communicator = APICommunicator(single_gathering_urls) api_communicator.gather_resources() # Database Manipulator could be inside API communicator db = DatabaseManipulator(single_gathering_urls, nosql_username, nosql_password) db2 = DatabaseManipulator(single_gathering_urls, nosql_username, nosql_password) api_communicator.create_transactions(db) machine_learning_model = MLModel(db.model_cursor, db.get_data()) # Predict DCMM entities from gathered API input prediction = machine_learning_model.predict() # todo: disable printing # print_classified(machine_learning_model, prediction) api_communicator.update_transactions( prediction, machine_learning_model.test_description, db) mean_value = numpy.mean(prediction == machine_learning_model.labels) print(mean_value)
def main(): if (len(sys.argv) != 6): print( "Five Arguments needed! How to: python3 predict_ml_model.py <instanceType> <productDescription> <id> <region> <test run = 1 or actual run = 2>" ) exit(0) version = int(sys.argv[5]) if (version != 1 and version != 2): print("Last argument has to be 1 or 2") exit(0) region = str(sys.argv[4]) instance_type = str(sys.argv[1]) product_description = str(sys.argv[2]) image_id = str(sys.argv[3]) gen = GenerateTrainingData('training_data_v3.csv') if (gen.generate(instance_type, product_description) == 0): exit(0) df = pd.read_csv('training_data_v3.csv', sep=',') zones = None if (region == 'worldwide'): #zones = df['AvailabilityZone'].drop_duplicates().values zones = ['ap-northeast-1a', 'eu-west-3a'] else: all_zones = df['AvailabilityZone'].drop_duplicates().values zones = [s for s in all_zones if region in s] rep_product_description = replace_name(product_description) file_name = 'predictions/' + instance_type + '_' + rep_product_description + '_' + image_id + '.csv' try: os.remove(file_name) except: pass print(zones) for x in zones: #for x in ['ap-northeast-1a', 'ap-northeast-1c']: try: architecture_name = instance_type + '_' + rep_product_description + '_' + str( x) + '_architecture.json' weights_name = instance_type + '_' + rep_product_description + '_' + str( x) + '_weights.h5' mlobj = MLModel(weights_name, architecture_name, instance_type, rep_product_description, region) model = mlobj.load_model() model.compile(optimizer='nadam', loss='mean_squared_error', metrics=['accuracy']) training_features, labels, scaler = mlobj.generate_training_data( df, x, version, 0) #only needed for the scaler prediction = 0 if (version == 1): test_predictions, test_data = mlobj.predict_testdata( df, scaler, x) #predict test data prediction = sum(test_predictions) sum_test = sum(test_data[:, 0]) print(prediction, sum_test) elif (version == 2): future_predictions = mlobj.predict_future(df, scaler, x) #predict future prediction = sum(future_predictions) print(prediction) #mse_outcome, mae_outcome, mape_outcome = mlobj.getErrors(test_predictions[:,column], test_data[:,column]) with open(file_name, 'a+') as f: f.write("%s,%s\n" % (round(prediction, 4), x)) #print(round(mape_outcome, 4), round(sum_test, 4), round(sum_prediction - sum_test, 4)) #with open('predictions.csv', 'a') as f: # f.write("%s, %s, %s, %s, %s, %s, %s, %s, %s, %s\n" % (instance_type, product_description, x, epochs, ticks, batch_size, round(mape_outcome, 4), round(sum_test, 4), round(sum_prediction, 4), round(sum_test-sum_prediction, 4))) # plot_all(predictions[:, column], test_data[:, column], epochs, instance_type, product_description, x) except: print('Skip', str(x)) exit(0)
x_total, y_total = stock_predict.generate_x_and_y_3( time_steps, change_total[boll_num - 1:], ema[boll_num - 1:], H_line[boll_num - 1:], M_line[boll_num - 1:], L_line[boll_num - 1:], turnover_vol_total[boll_num - 1:]) total_days = len(quotes) - boll_num train_days = len(train_quotes) - boll_num x_train = x_total[0:train_days - time_steps] y_train = y_total[0:train_days - time_steps] x_test = x_total[train_days - time_steps:] y_test = y_total[train_days - time_steps:] y_test = np.argmax(y_test, axis=1) - 1 # 构建模型 model = MLModel(input_shape=(x_train.shape[1], x_train.shape[2]), stock_name=stock) # 训练模型 model.train_model(x_train=x_train, y_train=y_train, epoch=10, batch_size=32) # 预测结果 y_predict = model.predict(x_test) y_predict = np.argmax(y_predict, axis=1) - 1 print("y_predict: ", y_predict) correct_num = 0 rough_correct_num = 0 for i in range(y_test.shape[0]): if y_predict[i] == y_test[i]: correct_num += 1 if (y_predict[i] > 0