def main(self): """ Pulls stock data for the ticker symbols from a json file and pull data from quandl, preprocesses the data and then build different supervised learning machine learning models and predicts future stock price. :return: None """ logger.info( "------------------Started Stock Price Prediction-----------------" ) # Create instances of all the classes used for stock prediction get_data = GetData(api_key=sys.argv[1]) # Number of dates/data points into the future for which the stock price is to be predicted as a percentage of the # number of dates/data points for which historical data which is already available future_prediction_pcnt = 1 preprocess_data = PreprocessData( future_prediction_pcnt=future_prediction_pcnt) build_models = BuildModels() forecast_prices = Predictions() # Get data from quandl. df = get_data.get_stock_data(update_data=False) # Preprocess data preprocessed_data_dict, original_df_dict = preprocess_data.preprocess_data( df, get_data.stock_ticker_list) models_list = [ "Linear Regression", "Decision Tree Regressor", "Random Forest Regressor" ] # Build models models_dict, model_scores_dict = build_models.build_models( models_list, preprocessed_data_dict, force_build=False) # Predict future stock prices forecast_df_dict = forecast_prices.make_predictions( models_dict, preprocessed_data_dict, original_df_dict) self.plot_forecast(forecast_df_dict, original_df_dict, future_prediction_pcnt)
def fetch_data(currenceyAr, exchange, directory, period): dataObj = GetData(exchange, directory) EWCEWAIGE = pd.read_csv('EWCEWAIGE.csv').set_index('date') currenceyPairs = [] currenceyTsLengths = [] for currencey in currenceyAr: if currencey == 'ewc' or currencey == 'ewa' or currencey == 'ige': currenceyPairs.append(EWCEWAIGE[currencey].values) currenceyTsLengths.append(len(EWCEWAIGE[currencey].values)) else: ts = dataObj.fetch(currencey) ts = dataObj.periodFormatter(currencey, period) ts.index = pd.to_datetime(ts.index).dropna() currenceyTsLengths.append(len(ts.Close.values)) currenceyPairs.append(ts.Close.values) maxLen = 20000 for count, curr in enumerate(currenceyPairs): if min(currenceyTsLengths) < 10000: currenceyPairs[count] = curr[-min(currenceyTsLengths):] else: currenceyPairs[count] = curr[-maxLen:] A = [] for curr in currenceyPairs: A.append(curr) A = np.transpose(np.array(A)) return A
def main(argv=None): config = Config() KG_name = config.flie_path.split('/')[-2] getdata = GetData() config.relation_total, config.entity_total, config.triple_total = getdata.get_data( config.flie_path) if config.model_name.lower() == 'transe': trainModel = TransE(config=config) elif config.model_name.lower() == 'transd': trainModel = TransD(config=config) elif config.model_name.lower() == 'transh': trainModel = TransH(config=config) elif config.model_name.lower() == 'transr': trainModel = TransR(config=config) else: trainModel = TransE(config=config) print('输入TransX模型名称有误,默认采用TransE模型') with tf.compat.v1.Session() as sess: train_op = tf.compat.v1.train.GradientDescentOptimizer( trainModel.learning_rate).minimize(trainModel.loss) saver = tf.compat.v1.train.Saver() sess.run(tf.compat.v1.global_variables_initializer()) next_batch = getdata.get_next_batch(trainModel.batch_size) min_loss = 0 gloabl_step = 0 for epoch in range(trainModel.epochs): # 有放回的随机采样 pos_h_batch, pos_r_batch, pos_t_batch, neg_h_batch, neg_r_batch, neg_t_batch = getdata.get_batch( trainModel.batch_size) # 按批次依次抽取数据 # pos_h_batch, pos_r_batch, pos_t_batch, neg_h_batch, neg_r_batch, neg_t_batch = next_batch.__next__() feed_dict = { trainModel.pos_h: pos_h_batch, trainModel.pos_t: pos_t_batch, trainModel.pos_r: pos_r_batch, trainModel.neg_h: neg_h_batch, trainModel.neg_t: neg_t_batch, trainModel.neg_r: neg_r_batch } sess.run([trainModel.loss, train_op], feed_dict=feed_dict) loss = sess.run(trainModel.loss, feed_dict=feed_dict) if loss < min_loss: min_loss = loss gloabl_step = epoch if epoch % 50 == 0: print('epoch:', epoch, ',loss:', loss) saver_add = './模型保存路径/' + KG_name + '/' + str( type(trainModel)).replace("<class 'Models.", '').replace("'>", '') + '/' print('模型文件保存在', saver_add + 'model.ckpt') try: os.makedirs(saver_add) except: pass saver.save(sess, saver_add + 'model.ckpt')
def detik_popular(): html_parser = GetData("https://www.detik.com/terpopuler?", "{'tag_from':'wp_cb_mostPopular_more'}").html_parser() popular_area = html_parser.find(attrs={'class': 'grid-row list-content'}) popular_titles = popular_area.findAll(attrs={'class': 'media__title'}) popular_images = popular_area.find_all(attrs={'class': 'media__image'}) return render_template('detik_popular.html', images=popular_images)
def run(apk_path, mail_list): try: logger.info("开始检查apk") CheckApp(apk_path).check_app() logger.info("开始生成报告") start_gunicorn() report_path = get_html(mail_list) response_filepath = UPLoad().upload_local_report(report_path) total_size = GetData().get_base()['apksize'] version = GetData().get_pkg_info()['versionName'] except Exception as e: logger.error("运行异常:{}".format(e))
def __init__(self, exchange, currencey, period, directory, livePreScreen=False): self.exchange = exchange self.currencey = currencey self.period = period self.directory = directory dataObj = GetData(self.exchange, self.directory) self.ts = dataObj.fetch(self.currencey) self.ts = dataObj.periodFormatter(self.currencey, period) self.ts.index = pd.to_datetime(self.ts.index).dropna() self.USDCAD = pd.read_csv('CADUSD.csv').astype(float).dropna()
def __init__(self, **kwargs): """ Constructor for the class. """ self.stock_data_info = GetData.get_stock_data_info() self.original_df_dict = {} self.preprocessed_data_dict = {} self.ticker_symbol_list = [] self.future_prediction_pcnt = kwargs["future_prediction_pcnt"]
def creat_report(html_path, filename, aab=False): ''' /templates/report_template.html模板,生成单次报告,并更新rs.json ''' gd = GetData() base_info = gd.get_base() pkg_info = gd.get_pkg_info() apk_info = gd.make_entries() apk_detail = gd.get_html_table() if pkg_info['package'] in internalapp: if aab: appname = internalapp[pkg_info['package']] + '_aab' else: appname = internalapp[pkg_info['package']] else: if aab: appname = pkg_info['package'] + '_aab' else: appname = pkg_info['package'] context = { 'filename': os.path.basename(filename), 'package': pkg_info['package'], 'appname': appname, 'versionName': pkg_info['versionName'], 'apksize': base_info['apksize'], 'minSdkVersion': pkg_info['minSdkVersion'], 'targetSdkVersion': pkg_info['targetSdkVersion'], 'versionCode': pkg_info['versionCode'], 'appinfo': apk_info, 'appdetail': apk_detail, 'create_time': time.strftime("%Y-%m-%d %H:%M:%S"), } logger.info('生成报告:%s' % html_path) if aab: apks_info = GetData().get_apks_size_info() context.update(apks_info) with open(html_path, 'w') as f: html = render_template("report_template_aab.html", context=context) f.write(html) f.close() del_files(config.tmp_apks_path) pkg = context['package'] + '_aab' else: with open(html_path, 'w') as f: html = render_template("report_template.html", context=context) f.write(html) f.close() pkg = context['package'] logger.info('报告生成完成') context.pop('appinfo') context.pop('appdetail') context.update({'report_path': os.path.join('./', pkg, 'reports', os.path.basename(html_path))}) return context
def trainModel(input_path, target_path, grid, model_name='XGBoost', min_cut=300, max_cut=500000, cv=3, eval=False): if model_name == 'XGBoost': model = XGBRegressor(objective='reg:squarederror') elif model_name == 'RandomForest': model = RandomForestRegressor() else: print("... Unknown model name ...") return 0 X_train, X_test, y_train, y_test = GetData(input_path, target_path, size=0.10) X_train, y_train = Cuts(X_train, y_train, min_cut, max_cut) print(".....Training {m} on {t} training samples.".format(m=model_name, t=y_train.size)) print(".....Applying GridSearchCV on {m} with grid {g}.".format( m=model_name, g=grid)) model_full0 = GridSearchCV(model, grid, scoring='neg_mean_squared_error', cv=cv, iid=False, verbose=10).fit(X_train, y_train) print(".....Best params ...", model_full0.best_params_) model_final = model_full0.best_estimator_ print(".....Saving {m}.".format(m=model_name)) pickle.dump(model_final, open("model_price_me.dat", "wb")) if eval: plt.hist2d(model_final.predict(X_test), y_test, 15, cmin=5) x = np.arange(min(y_test), max(y_test)) plt.plot(x, x, color='red') plt.xlabel("estimated salary") plt.ylabel("true salary") plt.axis('scaled') print(".....Saving {img}.....".format(img='{model}_2dhist.png')) plt.savefig('{model}_2dhist.png'.format(model=model_name)) print("..... RMSE = ", math.sqrt(mse(model_final.predict(X_test), y_test))) print("..... MAE = ", mae(model_final.predict(X_test), y_test)) print("Done.") else: print("Done.")
def kaggle(self, mean, std_dev): data_getter = GetData() train_x, train_y = data_getter.load_training( dataset_modifier=self.dataset_modifier, as_image=self.as_image, transform=self.transform, augment=self.augment) test_x = data_getter.load_test(as_image=self.as_image, transform=self.transform) train_x = train_x.reshape((-1, 64, 64, 1)) train_y = train_y.flatten() test_x = test_x.reshape((-1, 64, 64, 1)) print('dataset loaded successful for kaggle') tensor_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((mean, ), (std_dev, )), ]) trainset = ImageDataset(train_x, train_y, transform=tensor_transform) testset = TestImageDataset(test_x, transform=tensor_transform) trainloader = torch.utils.data.DataLoader(trainset, shuffle=True, batch_size=8, num_workers=2) testloader = torch.utils.data.DataLoader(testset, batch_size=8, num_workers=2) return trainloader, testloader
def __init__ (self,country_region, **kwargs): AlgParam.__init__(self, country_region) self.LikelihoodFunc = LikelihoodFunc() self.data = GetData(countryRegion = country_region)#Geta the data set self.time =np.arange(0, self.data.confirmed.shape[0]- self.initT,1) self.observations = self.data._timeInterval_data (self.initT, self.time.size) self.init_condition = self.observations[:,0] self.model = SIR (self.init_condition, theta = np.zeros (shape = (2, self.time.size)), time = self.time, N = self.N) self.observation_infected_removed = np.array ([self.observations[0,:], self.observations[1,:] + self.observations[2,:]])# infection and recovered + deaths self.time_node = np.arange(0, self.data.confirmed.shape[0]-self.initT,self.deltaT) self.time_node[-1] = self.data.confirmed.shape[0]-self.initT self.theta_dim = self.model.paramDim*self.time_node.size self.theta_shape = (self.model.paramDim, self.time_node.size) ##self.observation_deltaT = observation_deltaT self.time_4_eval_marginal_likelihood = self.time[np.arange(0,self.time.size, self.observation_deltaT, dtype = int )] self.time_4_eval_marginal_likelihood[-1] = self.time[-1] self.observation_initT = self.observation_infected_removed[:,0] self.identified_param_dim = self.theta_dim + self.observation_initT.size mcmc.__init__(self,_func_prior_pdf = self.log_prior_pdf, _func_prior_gen = self.prior_pdf_rvs, _func_likelihood = self.log_likelihood, _func_model_ = self.forward_model, _func_kernel_gen = self.adaptive_kernel_rvs, _func_kernelRatio = self.logkernelRatio, n_MC = self.n_MC, dim = self.identified_param_dim, loglikelihood = True) #MCMC index = np.where(self.observation_initT ==0) self.observation_initT[index] = 10. self.identified_param_dim = self.theta_dim + self.observation_initT.size
def html(): ''' 拼接html报告 ''' gd = GetData() base_info = gd.get_base() pkg_info = gd.get_pkg_info() apk_info = gd.make_entries() apk_detail = gd.get_html_table() return render_template( "report_template.html", appname=pkg_info['package'], appversion=pkg_info['versionName'], appsize=base_info['apksize'], minSdkVersion=pkg_info['minSdkVersion'], targetSdkVersion=pkg_info['targetSdkVersion'], appinfo=apk_info, appdetail=apk_detail, reporttime=time.strftime("%Y-%m-%d %H:%M:%S"), )
def data(): #read in config info from json file data_config = json.load(open('input_config.json', 'r')) previous_data_name = data_config["data_and_directories"][ "previous_data_name"] main_src_directory = data_config["data_and_directories"][ "main_src_directory"] data_directory = data_config["data_and_directories"][ "main_data_directory"] period = data_config["data_information"]["period"] assetType = data_config["data_information"]["asset_type"] pip = data_config["data_information"]["pip"] #we want to make it s.t. if the 'newdata' param is False, then there must be an accompanying file directory if data_config["data_and_directories"][ "newData"] == False and previous_data_name == False: print( '\nyou need to input a directory for the chosen file dummboy') sys.exit(2) #creating a very specific directory temp_dir = '' for currency in data_config["data_information"]["currencies"].split( ','): temp_dir = temp_dir + '_' + tickDir(currency) #get data for all specified tickers. NOTE as is, this will only create a df as long as the shortest time series if data_config["data_and_directories"]["newData"] == False: full_df = dePickler(data_directory + previous_data_name) temp_dir = 'period_' + str(period) + '_pulledAt_' + str( time.time())[:10] + '_' + temp_dir + '_' + str( full_df.index.values[-1]).replace('.', '').replace( ':', '').replace(':', '') + '_to_' + str( full_df.index.values[0]).replace('.', '').replace( ':', '').replace(':', '') + '.pickle' else: dataObj = GetData( exchange=data_config["data_information"]["exchange"], directory=data_directory, period=data_config["data_information"]["period"], asset_type=data_config["data_information"]["asset_type"], newdata=data_config["data_and_directories"]["newData"]) full_df = None #for each currency, fetch the time series and create a full dataframe for currency in data_config["data_information"][ "currencies"].split(','): data = dataObj.fetch(currency) #make the df include seperate names for open high low and close for label in ['Close', 'Open', 'High', 'Low', 'Volume']: if full_df is None: full_df = pd.DataFrame(data[label]) full_df.columns = [currency + '_' + label] else: full_df[currency + '_' + label] = data[label] temp_dir = 'period_' + str(period) + '_pulledAt_' + str( time.time())[:10] + '_' + temp_dir + '_' + str( full_df.index.values[-1]).replace('.', '').replace( ':', '').replace(':', '') + '_to_' + str( full_df.index.values[0]).replace('.', '').replace( ':', '').replace(':', '') + '.pickle' #create features for all of the data and save print('Creating Features') mlObj = MLstrategy(data=full_df, currencies=data_config["data_information"] ["currencies"].split(','), model_directory=main_src_directory + 'models/', pip=pip) #if we're not collecting new data, then no need to create new data. Otherwise, create new features if data_config["data_and_directories"]["newData"] == False: df = full_df else: df = mlObj.preprocessIndicators( forecastLen=data_config["data_information"]["forecastLen"]) pickler(data_directory + 'INDICATORS_ADDED_' + temp_dir, df) print(full_df) return full_df
import dash_html_components as html import dash_core_components as dcc import dash_bootstrap_components as dbc import pandas as pd import sys # insert at 1, 0 is the script path (or '' in REPL) sys.path.insert(1, '../../') from getdata import GetData sys.path.insert(1, './components/PlotGenerator') from PlotGenerator import scatter_data, get_lsc getdata = GetData(path='./databases/serverdb.db') df = getdata.get_dataframe(path='./databases/serverdb.db') #[Scatterplot 1] fig1, fig2 = scatter_data(df) dummy_lsc = [[0, 50, 100, 200, 300, 400, 500, 600], [0, 1.5, 2, 4, 7.5, 12.5, 20, 40.6]] dummy_lsc = pd.DataFrame(dummy_lsc).T dummy_lsc.columns = ['P', 'S'] lsc = get_lsc(dummy_lsc) items = [ dbc.DropdownMenuItem(html.A("Dashboard", id='dash-app', href='/', style={ 'color': 'black', ':hover': { 'color': 'black'
def html(): ''' 拼接html报告 ''' tuples = () data = read_mem() i = [] j = [] try: for index in range(len(data[0])): i.append(data[0][index]) i.append(float(data[1][index])) j.append(i) i = [] except Exception as e: logger.error(e) tuples = tuples + tuple(j) tuples1 = () data1 = read_cpu() k = [] H = [] try: for index in range(len(data1[0])): k.append(data1[0][index]) k.append(float(data1[1][index])) H.append(k) k = [] except Exception as e: logger.error(e) tuples1 = tuples1 + tuple(H) tuples2 = () data2 = get_page() m = [] n = [] try: for index in range(len(data2[0])): m.append(data2[0][index]) m.append(float(data2[1][index])) n.append(m) m = [] except Exception as e: logger.error(e) tuples2 = tuples2 + tuple(n) tuples3 = () data3 = read_fps() m = [] n = [] try: for index in range(len(data3[0])): m.append(data3[0][index]) m.append(float(data3[1][index])) n.append(m) m = [] except Exception as e: logger.error(e) tuples3 = tuples3 + tuple(n) tuples4 = () data4 = read_network() m = [] n = [] try: for index in range(len(data4[0])): m.append(data4[0][index]) m.append(float(data4[1][index])) n.append(m) m = [] except Exception as e: logger.error(e) tuples4 = tuples4 + tuple(n) print tuples4 params = json.loads(request.get_data()) apkpath = params['apk_path'] device_name = params['device_name'] gd = GetData() gb = GetBasic(apkpath,device_name) calculate_coverage = gd.get_calculate_coverage() return render_template("report_template.html", appname=gb.get_app_name(), appversion=gb.get_app_version(), appsize=gb.get_app_size(), devicesmodel=gb.get_devices_model(), devicesversion=gb.get_devices_version(), installtime=gd.get_install_time(), coldtime=gd.get_lanuch_time(), alreadycov=calculate_coverage[0], notcov=calculate_coverage[1], crashcount=gd.get_crash_count(), data=tuples, memtime=data[0], meminfo=data[1], memactivity=data[2], data1=tuples1, cputime=data1[0], cpuinfo=data1[1], cpuactivity=data1[2], data2=tuples2,pagetime=data2[0], pageinfo=data2[1], pageactivity=data2[2], data3=tuples3, fpstime=data3[0], fpsinfo=data3[1], fpsactivity=data3[2], data4=tuples4, networktime=data4[0], networkinfo=data4[1], networkactivity=data4[2], reporttime = time.strftime("%Y-%m-%d %H:%M:%S"), runpages = gd.get_run_pages(), runtime = gd.get_runtime(), clickcount = gd.get_click_count(), crashdetail = gd.get_crash_detail(), crashimage = gd.get_crash_image(), login = gd.get_login(), )
class Model(object): """ Builds simple LSTM model using 'keras' library with 'tensorflow` backend. The structure of the LSTM includes 128 hidden units, dense layer and a softmax activation layer. """ gd = GetData() gd.get_dataset() def __init__(self): """ Default variable initialization method. """ self.char_len = len(self.gd.chars) self.maxlen = self.gd.preprocess.__defaults__[0] self.model = Sequential() def lstm_(self, units=512): """ :param units: positive int, optional Dimensionality of the output space :return: object Returns an object of class `keras.models.Sequential`. """ if not os.path.exists(self.gd.model_dir): log.info("LSTM-model subfolder not found.") os.mkdir(self.gd.model_dir) log.info("'model' subfolder created.") if os.path.exists(self.gd.model_dir): if os.path.exists(os.path.join(self.gd.model_dir, 'model.h5')): log.info("LSTM-model found.") self.model = load_model( os.path.join(self.gd.model_dir, 'model.h5')) self.model.summary() return self.model else: log.info( "Previous state not found. Initializing LSTM-model structure. Stand by ..." ) self.model.add( LSTM(units=units, input_shape=(self.maxlen, self.char_len), dropout=0.2, return_sequences=False) ) # set return_sequences to True to stack LSTMs #self.model.add(Dropout(0.5)) #self.model.add(LSTM(units=units, dropout=0.2, # return_sequences=False)) self.model.add(Dense(units=self.char_len)) self.model.add(Activation(activation='softmax')) log.info("LSTM-model successfully initialized.") self.model.summary() return self.model
# usr/bin/env python """ Created on Fri Jun 23 @author : Vijayasai S """ from createdictionary import CreateDictionary from getdata import GetData from poi import POI if __name__ == "__main__": filename = raw_input("Enter the filename: ") time = float(raw_input("Enter the limit-time for the pitstops: ")) dict = CreateDictionary(filename) data = GetData(dict, filename) POI(time, data)
class SirIden (mcmc,AlgParam): def __init__ (self,country_region, **kwargs): AlgParam.__init__(self, country_region) self.LikelihoodFunc = LikelihoodFunc() self.data = GetData(countryRegion = country_region)#Geta the data set self.time =np.arange(0, self.data.confirmed.shape[0]- self.initT,1) self.observations = self.data._timeInterval_data (self.initT, self.time.size) self.init_condition = self.observations[:,0] self.model = SIR (self.init_condition, theta = np.zeros (shape = (2, self.time.size)), time = self.time, N = self.N) self.observation_infected_removed = np.array ([self.observations[0,:], self.observations[1,:] + self.observations[2,:]])# infection and recovered + deaths self.time_node = np.arange(0, self.data.confirmed.shape[0]-self.initT,self.deltaT) self.time_node[-1] = self.data.confirmed.shape[0]-self.initT self.theta_dim = self.model.paramDim*self.time_node.size self.theta_shape = (self.model.paramDim, self.time_node.size) ##self.observation_deltaT = observation_deltaT self.time_4_eval_marginal_likelihood = self.time[np.arange(0,self.time.size, self.observation_deltaT, dtype = int )] self.time_4_eval_marginal_likelihood[-1] = self.time[-1] self.observation_initT = self.observation_infected_removed[:,0] self.identified_param_dim = self.theta_dim + self.observation_initT.size mcmc.__init__(self,_func_prior_pdf = self.log_prior_pdf, _func_prior_gen = self.prior_pdf_rvs, _func_likelihood = self.log_likelihood, _func_model_ = self.forward_model, _func_kernel_gen = self.adaptive_kernel_rvs, _func_kernelRatio = self.logkernelRatio, n_MC = self.n_MC, dim = self.identified_param_dim, loglikelihood = True) #MCMC index = np.where(self.observation_initT ==0) self.observation_initT[index] = 10. self.identified_param_dim = self.theta_dim + self.observation_initT.size def run_MCMC (self,**kwargs): self.kernel_cov = self.getPriorCov() return mcmc.run_MCMC (self,**kwargs) def x2theta_x0(self,x): theta = x[0:self.theta_dim].reshape (self.theta_shape) x0 = x[self.theta_dim:] return theta,x0 def theta_x0_2_theta (self,theta,x0, **kwargs): list = [] for i in range (theta.shape[0]): list.append(theta[i,:]) list.append(x0) for key in kwargs.keys(): list.append(kwargs['key']) return np.concatenate(list) def prior_pdf (self, x): theta, x0 = self.x2theta_x0(x) return self.prior_theta_pdf(theta)*self.LikelihoodFunc.eval(x0, self.observation_initT) def log_prior_pdf (self, x): theta, x0 = self.x2theta_x0(x) return np.log(self.prior_theta_pdf(theta)) + np.log(self.LikelihoodFunc.eval(x0, self.observation_initT)) def prior_theta_pdf (self, theta): #log(beta) and log(gamma) follow uniform distribution _prior = 1. for i in range (0,self.theta_shape[0]): _prior *= float(np.min (self.priorTheta[i,1]>= theta[i,:])* np.min (self.priorTheta[i,0]<= theta[i,:])) return _prior def prior_init_condition(self,x0): prior = 1. for i in range (0,x0.size): ratio = x0[i]/self.observation_initT[i] - 1. prior *= float((ratio>= self.LikelihoodFunc.cenBelief[0])* (ratio <= self.LikelihoodFunc.cenBelief[1])) return prior def prior_pdf_rvs (self): x0 = self.prior_init_condition_rvs() theta = self.prior_theta_rvs() return self.theta_x0_2_theta(theta, x0) def prior_init_condition_rvs (self): return np.random.uniform(low = self.observation_initT*(1.+self.LikelihoodFunc.cenBelief[0]), high = self.observation_initT*(1.+self.LikelihoodFunc.cenBelief[1])) def prior_theta_rvs(self): shape = (self.time_node.size,self.model.paramDim) return (np.random.uniform(low = self.priorTheta[:,0], high = self.priorTheta[:,1], size = shape).transpose() ) def likelihood (self,y): _likelihood = 1. y = y.reshape((2,y.size//2)) for ti in range (1,self.time.size-1): xobserved = self.observation_infected_removed[:,ti] - self.observation_infected_removed[:,ti-1] _likelihood *= self.LikelihoodFunc.eval(y[:,ti-1], xobserved) return _likelihood # Log-likelihood def log_likelihood (self,y, **kwargs): if 'observationTime' in kwargs.keys(): observationTime= kwargs['observationTime'] else: observationTime = self.time_4_eval_marginal_likelihood _loglikelihood = 0. y = y.reshape((2,y.size//2)) for ti in range(1,observationTime.size): t = observationTime[ti] t1 = observationTime[ti] t0 = observationTime[ti-1] xobserved = self.observation_infected_removed[:,t1] - self.observation_infected_removed[:,t0] #xobserved = self.observation_infected_removed[:,t] lg = (self.LikelihoodFunc.eval_log(y[:,ti-1], xobserved)) _loglikelihood += lg return _loglikelihood # new cases during intervals of observationTime def forward_model (self, x, **kwargs): if 'observationTime' in kwargs.keys(): observationTime= kwargs['observationTime'] else: observationTime = self.time_4_eval_marginal_likelihood vtheta = x[0:self.theta_dim].reshape (self.theta_shape) self.model.theta = self.interpolate_theta(vtheta) self.model.init_condition = x [self.theta_dim:self.theta_dim+2] __modelPrediction = self.model.eval() t1 = observationTime[1:] t0 = observationTime [0:-1] # New infection cases,and new recovered + deaths, and incrementModel = __modelPrediction[:,t1] - __modelPrediction[:, t0] incrementModel = np.concatenate((incrementModel[0,:],incrementModel[1,:])) return incrementModel # picewise linear approximation of beta and gamma def interpolate_theta (self, logvtheta): vtheta = np.exp (logvtheta) ftheta = np.array ([np.interp(self.time, self.time_node, vtheta[i,:]) for i in range (vtheta.shape[0])]) return ftheta def getPriorCov(self): # Sampling the prior with N samples to compute the covariance N = 10000 x = np.zeros(shape = (N, self.identified_param_dim)) for i in range (N): x[i,:] = self.prior_pdf_rvs() return np.cov(x.transpose()) def getcorrPriorCov (self): _cov = self.getPriorCov () for i in range (0, self.time_node.size-1): _cov[i,i+1] = 0.5*np.sqrt(cov[i,i]*_cov[i+1,i+1]) _cov[i+1,i] = _cov[i,i+1] for i in range (self.time_node.size, self.time_node.size*2-1): _cov[i,i+1] = 0.5*np.sqrt(cov[i,i]*_cov[i+1,i+1]) _cov[i+1,i] = _cov[i,i+1] return _cov def adaptive_kernel_std_ratio (self): if self.acceptanceRatio > self.targeted_acceptance_ratio*1.15 and self.currNbRepeatedSample <= 100: self.kernel_std_ratio = min (self.kernel_std_ratio*1.15, 0.1) if self.acceptanceRatio < self.targeted_acceptance_ratio/1.15 or self.currNbRepeatedSample >= 100: self.kernel_std_ratio = self.kernel_std_ratio/1.15 def adaptive_kernel_rvs (self, x): if ((self.mcmcStep > 2000 and self.mcmcStep%100 == 0) or (self.currNbRepeatedSample > 100 and self.currNbRepeatedSample % 20 ==0)): self.adaptive_kernel_std_ratio() if self.mcmcStep%500 == 0: print ('kernel_std_ratio', self.kernel_std_ratio) self.kernel = multivariate_normal(mean = np.zeros_like(x), cov = self.kernel_cov) _prior_pdf = 0. while _prior_pdf == 0.: if self.mcmcStep < 1: _xN = self.prior_pdf_rvs() else: _xN = np.sqrt(1-self.kernel_std_ratio**2)*x + self.kernel.rvs()*self.kernel_std_ratio _prior_pdf = self.prior_pdf(_xN) return _xN def logkernelRatio (self,x_0,x_1): if self.mcmcStep < 1: _a = 0. else: _a = (self.kernel.logpdf (x_1-np.sqrt(1-self.kernel_std_ratio**2)*x_0) -self.kernel.logpdf (x_0-np.sqrt(1-self.kernel_std_ratio**2)*x_1)) return _a def plot (self, x): theta,x0 = self.x2theta_x0(x) self.model.theta = self.interpolate_theta(theta) self.model.init_condition= x0 self.model.eval() plt.plot(self.time + self.initT, self.model.simData[0,:], '-^',label = 'confirmed cases') plt.plot (self.time+ self.initT,self.model.simData[1,:], '--*',label = 'recovered + deaths') plt.plot(self.time + self.initT,self.model.simData[0,:] - self.model.simData[1,:],'--s', label = 'active cases') plt.plot(self.data.confirmed, '-', label = 'data confirmed') plt.plot(self.data.deaths+ self.data.recovered, '*-', label = 'data recovered deaths') plt.plot(self.data.confirmed -self.data.deaths - self.data.recovered, '--', label = 'active cases' ) def modelBayesCriterion (self, samples, **kwargs): # real-likelihood = scaledBC *np.exp(logscale) if 'observationTime' in kwargs.keys(): observationTime= kwargs['observationTime'] else: observationTime = self.time_4_eval_marginal_likelihood _BC = np.zeros_like(samples[:,0]) for i in range (samples.shape[0]): _y = self.forward_model(samples[i,:], observationTime = observationTime) _BC [i] = ( self.log_likelihood (_y,observationTime = observationTime)) _logscaledConst = np.mean (_BC) _scaledBC = 0. for i in range (samples.shape[0]): _scaledBC += np.exp(_BC[i] - _logscaledConst)/samples.shape[0] return _scaledBC, _logscaledConst def plotLikelihood(self): x= np.arange(self.LikelihoodFunc.cenBelief[1]-0.2, self.LikelihoodFunc.cenBelief[1] + 0.2, 0.02) plt.plot(x, self.LikelihoodFunc.eval(x+1, 1))
[0.5, 4, 7, 12, 15, 18], 200) k = 1 lastnum = lastnum + po return pfds, hursts, bins, hfd if __name__ == "__main__": global nof global po client = OSCClient() client.connect(("127.0.0.1", 8000)) forever = True data_getter = GetData() data_getter2 = GetData2() #concentrated,resting,blinking,normal_pics,trippy_pics = data_getter.get_states(file,file2) concentrated = data_getter2.get_states(file) resting = data_getter2.get_states(file23) pfds_c, hursts_c, bins_c, hfd_c = get_state_features(concentrated) pfds_r, hursts_r, bins_r, hfd_r = get_state_features(resting) delta_c = np.zeros((4, len(bins_c))) beta_c = np.zeros((4, len(bins_c))) alpha_c = np.zeros((4, len(bins_c))) theta_c = np.zeros((4, len(bins_c))) bt_c = np.zeros((4, len(bins_c)))
class Test(object): """ Loads the model and proceeds to predict on a random input sequence of length `self.maxlen`. """ gd = GetData() gd.get_dataset() tr = Train() def __init__(self): """ Default variable initialization method. """ self.chars = self.gd.chars self.char_indices = self.gd.char_indices self.indices_char = self.gd.indices_char self.content = self.gd.content self.model_dir = self.gd.model_dir self.model_ = os.path.join(self.model_dir, 'model.h5') self.model_ckpt = os.path.join(self.model_dir, '/tmp/weights.hdf5') self.maxlen = self.gd.preprocess.__defaults__[0] self.start_index = self.start_index = random.randint( 0, len(self.content) - self.maxlen - 1) self.X, self.y = self.gd.preprocess() self.checkpointer = self.tr.checkpointer def load_model(self): """ Loads the model from the `self.model_dir`; or the last checkpoint from `self.model_ckpt`. :return: object Returns an object of class `keras.models.Sequential`. """ if os.path.exists(self.model_): model = load_model(self.model_) log.info("LSTM-model successfully loaded.") return model elif os.path.exists(self.model_ckpt): model = load_model(self.model_ckpt) # log.info("Continuing training from the checkpoint.") # time.sleep(0.5) # model.fit(self.X, self.y, # batch_size=128, epochs=1, # callbacks=[self.checkpointer]) log.info( "LSTM-model from the last checkpoint successfully loaded.") return model else: log.warning("Neither LSTM-model nor checkpoint weights found.") log.warning('Consider to fit the model first.') sys.exit('Abort.') def predict(self, model, script_len=50): """ Predicts on a given input. :param model: object, keras.models.Sequential Input LSTM-model to perform extrapolation. :param script_len: positive int, optional The length of a given code to produce. :return: code: list List of produced texts. The number of texts is the length of `diversity` """ model_ = model log.info('LSTM-model successfully loaded.') code = [] for diversity in [0.2, 0.8, 1.2]: # leave it be for this instance print('\n\n') print('Generating script with given diversity: ', diversity) generated = '' sentence = self.content[self.start_index:self.start_index + self.maxlen] generated += sentence print( 'Generating script from starting point of length `self.maxlen`: ', sentence) print('\n\n') sys.stdout.write(generated) for i in range(script_len): x = np.zeros((1, self.maxlen, len(self.chars))) for t, char in enumerate(sentence): x[0, t, self.char_indices[char]] = 1. # do not comprehend preds = model_.predict(x)[0] next_index = self.tr.sample(preds, diversity) next_char = self.indices_char[next_index] generated += next_char sentence = sentence[1:] + next_char sys.stdout.write(next_char) sys.stdout.flush() code.append(sentence) return code
from getdata import GetData #create the model import tensorflow as tf import numpy as np if __name__=="__main__": ob=GetData("./data.txt") data=ob.load_data(6000) print(len(data)) print(type(data)) #获得数据的word2index 和index2word,数据和数据的标签 word2index,index2word,datas,labels=ob.word2index_index2word(data) cleardatas=ob.dropstopswords(datas,"./stopwords.txt")#获取干净的数据 #lengthlist=[len(per) for per in cleardatas] #maxlength=max(lengthlist)#获取最大的文本 re_datas,re_labels=ob.padding_datas(datas,word2index,labels) #数据的shuflle index=np.arange(len(re_datas)) index=np.random.permutation(index) Datas=[] Labels=[] for perindex in index: Datas.append(re_datas[perindex]) Labels.append(re_labels[perindex]) Datas=np.array(Datas) Labels=np.array(Labels) train_datas=[] for per in Datas: temp=list(per) temp.append(0)
# CSV format: kanji/pronounciations/meaning/components/similar kanjis/../../../ease level/ kanjis_url = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vQtNCsV4crkpKfJGHa1dQniBbtNs1VrmE3MlhUDo2lT2DghEbO3fJKg5bR2FC_wn83hI0tgl2e1i172/pub?gid=1079852200&output=csv' kanjisim_url1 = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vQtNCsV4crkpKfJGHa1dQniBbtNs1VrmE3MlhUDo2lT2DghEbO3fJKg5bR2FC_wn83hI0tgl2e1i172/pub?gid=6359283&output=csv' kanjisim_url2 = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vQtNCsV4crkpKfJGHa1dQniBbtNs1VrmE3MlhUDo2lT2DghEbO3fJKg5bR2FC_wn83hI0tgl2e1i172/pub?gid=730163754&output=csv' # https://spreadsheets.google.com/feeds/download/spreadsheets/Export?key=16H15Te4hGrKUx1VEpvexZ94rrFWek1k3J7UL9qVVTG4&exportFormat=csv&gid=1079852200 kanjis_file_name = ('D:/Japanese/jap_anki/dumps/graph_kanjis_details.txt') kanjissim_file_name = ('D:/Japanese/jap_anki/dumps/graph_kanjis_sim.txt') print("Fetching data") fetched = False while not fetched: try: data = GetData() data.bufferKanjiSimData(kanjisim_url1, kanjisim_url2, kanjissim_file_name) data.get(kanjis_url, kanjis_file_name) fetched = True except Exception as e: print("- retry") print(e) time.sleep(60) print("Printing similarity") sdot = Similarity.graph(data) sdisplay = random.random() < 0.7 sdot.render('D:\Japanese\jap_anki\graphs\similarity', view=sdisplay) print("Printing composition")
from flask import request import dash import dash_core_components as dcc import dash_bootstrap_components as dbc from dash.dependencies import Input, Output import dash_html_components as html from dash.exceptions import PreventUpdate from dash.dash import no_update import chart_studio.plotly as py import plotly.express as px import plotly.graph_objects as go #Initialize getdata from getdata import GetData getdata = GetData() #plot generator from components.PlotGenerator.PlotGenerator import scatter_data, get_lsc #Get Bootstrap # from style.bootstrap import Bootstrap # external_scripts = Bootstrap().getScripts() # external_stylesheets = Bootstrap().getStylesheet() #Initilize Layouts from components.Home.Home import home_layout from components.CalibrationLayout.CalibrationLayout import calibration_layout #from flaskwebgui import FlaskUI #get the FlaskUI class server = flask.Flask(__name__) #ui = FlaskUI(server, port=2020)
def main(): data = GetData('poloniex', newdata=True) tickers = [ 'LTC/USDT' ] #,'BTC/USDT'] #tickers = data.tickers() ## tickers = ['OMG/BTC']#,'ETH/BTC','XMR/BTC','ZEC/BTC','BTC/USDT'] periods = ['5m', '15m', '30m', '1h'] #periods = ['5m','15m','30m','1h'] maximum_parameters = [] proportion_test = 0.1 graph_results = True data.fetch('BTC/USDT') # use if list length error'ETH/USDT','XRP/USDT', type_coin = 'USDT' for tick in tickers: if tick[-len(type_coin):] == type_coin: data.fetch(tick) temp_results = [] for numsimul in [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]: for period in periods: '''formats the bitcoin and current coin data to the right period''' tick_data = data.periodFormatter(tick, period) startDate = tick_data.index[0] btc_data = data.periodFormatter('BTC/USDT', period, startDate) '''formats the raw data to the proportion of data chosen''' startDate = tick_data.index[int( (1 - proportion_test) * len(tick_data))] endDate = tick_data.index[-1] btc_prices = btc_data.loc[startDate:endDate]['Close'] tick_prices = tick_data.loc[startDate:endDate]['Close'] if len(tick_prices) != len(btc_prices): tick_prices.drop(tick_prices.index[0], inplace=True) if len(tick_prices) == len( btc_prices) or tick[-len(type_coin):] == type_coin: strategy = Strategy(len(tick_prices), numsimul) for count, price in enumerate(tick_prices.values): if type_coin == 'BTC': strategy.tick(price, btc_prices.values[count]) elif type_coin == 'USDT': strategy.tick(price) strategy.macd(26, 12, 9, count, len(tick_prices)) temp_results.append([ tick, period, strategy.profit, strategy.balance, tick_prices, strategy.numtrades, min(strategy.balanceList), strategy ]) else: print('length error') break optimumParam = None for result in temp_results: if optimumParam == None: optimumParam = result optimum = result elif result[3] > optimumParam[3]: optimumParam = result else: pass print(optimumParam[0], optimumParam[1], '\nProfit on one coin per trade:', optimumParam[2], '\nBalance on', optimumParam[-1].USDpertrade, 'USD per trade:', optimumParam[3], '\nNumber of simeltaneuos trades:', optimumParam[-1].numSimul, '\nNumber of trades:', optimumParam[-1].numtrades) maximum_parameters.append(optimumParam) for param in maximum_parameters: plot = Graphics(param[4], bal=param[-1].balanceList, buylist=param[-1].buylist, selllist=param[-1].selllist, MACD=param[-1].MACD, signal=param[-1].signal, EMAfast=param[-1].EMAfast, EMAslow=param[-1].EMAslow) plot.MACD_plot(26)
class Train(object): """ Trains the input model on vectorized `X` and `y` inputs. Uses RMSProp as an optimizer with 5e-4 learning rate value. Saves checkpoint weights after each epoch. By default, trains on batches of size 256. """ gd = GetData() gd.get_dataset() model = Model() def __init__(self): """ Default variable initialization method. """ self.X, self.y = self.gd.preprocess() self.model = self.model.lstm_() self.learning_rate = 5e-4 self.optimizer = RMSprop(lr=self.learning_rate) self.model_dir = self.gd.model_dir self.model_ = os.path.join(self.model_dir, 'model.h5') self.ckpt_dir = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'weights.hdf5') self.content = self.gd.content self.maxlen = self.gd.preprocess.__defaults__[0] self.checkpointer = ModelCheckpoint(filepath=self.ckpt_dir, verbose=1, save_best_only=False) self.earlystopping = EarlyStopping(monitor='loss', min_delta=0.01, patience=3, verbose=1, mode='auto') def fit(self): """ :return: object Returns an object of class `keras.models.Sequential`. """ if os.path.exists(self.model_): log.info('Fully-trained model found.') self.model = load_model(self.model_) return self.model elif os.path.exists(self.ckpt_dir): log.info('LSTM-model checkpoint found.') self.model = load_model(self.ckpt_dir) self.model.fit(self.X, self.y, batch_size=256, epochs=20, callbacks=[self.checkpointer, self.earlystopping]) return self.model else: self.model.compile(loss='categorical_crossentropy', optimizer=self.optimizer) log.info("Commencing model fitting. Stand by ...") time.sleep(0.5) # for i in range(1, 30): # print('Iteration: ', i) self.model.fit(self.X, self.y, batch_size=256, epochs=20, callbacks=[self.checkpointer, self.earlystopping]) log.info("LSTM-model successfully fitted.") save_model(self.model, filepath=self.model_) log.info("LSTM-model dumped at 'model'.") return self.model @staticmethod def sample(preds, temp=1.0): preds = np.asarray(preds).astype('float64') preds = np.log(preds) / temp preds_exp = np.exp(preds) preds = preds_exp / np.sum(preds_exp) probas = np.random.multinomial(1, preds, 1) return np.argmax(probas)
def idr_rate(): dt_src = GetData("http://www.floatrates.com/daily/idr.json","").doc_req() json_data = dt_src.json() return render_template('idr_rate.html', json_datas = json_data.values())
def main(): # GPU选择 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = mobilenetv3_small(num_classes=2).to(device) pre_path = "" torch.load('./pretrained/mobilenetv3-small-c7eb32fe.pth', map_location=device) set_parameter_requires_grad(model.features, feature_extracting=True) # 关键参数设置 learning_rate = 0.001 num_epochs = 50 train_batch_size = 16 test_batch_size = 16 # 优化器设置 criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.classifier.parameters(), lr=learning_rate) # 加载数据集 print("加载数据集") train_dataset = GetData('./data', 224, 'train') test_dataset = GetData('./data', 224, 'test') print("加载数据集结束") train_loader = DataLoader(train_dataset, batch_size=train_batch_size, shuffle=True) test_loader = DataLoader(test_dataset, batch_size=test_batch_size, shuffle=True) # 画图需要的参数 epochs = [] evaloss = [] acc = [] # 打印模型结构 backbone = summary(model, (3, 224, 224)) for epoch in range(num_epochs): epochs.append(epoch + 1) # train过程 total_step = len(train_loader) train_epoch_loss = 0 model.train() for i, (images, labels) in enumerate(train_loader): # 梯度清零 optimizer.zero_grad() # 加载标签与图片 images = images.to(device) labels = labels.to(device) # 前向计算 output = model(images) loss = criterion(output, labels) # 反向传播与优化 loss.backward() optimizer.step() # 累加每代中所有步数的loss train_epoch_loss += loss.item() # 打印部分结果 if (i + 1) % 2 == 0: print('Epoch [{}/{}], Step [{}/{}], Loss: {:.5f}'.format( epoch + 1, num_epochs, i + 1, total_step, loss.item())) if (i + 1) == total_step: epoch_eva_loss = train_epoch_loss / total_step evaloss.append(epoch_eva_loss) print('Epoch_eva loss is : {:.5f}'.format(epoch_eva_loss)) # test过程 model.eval() with torch.no_grad(): correct = 0 total = 0 for images, labels in test_loader: images = images.to(device) labels = labels.to(device) output = model(images) _, predicted = torch.max(output.data, 1) print(predicted) total += labels.size(0) correct += (predicted == labels).sum().item() acc.append(100 * (correct / total)) print('Test Accuracy {} %'.format(100 * (correct / total))) # print(model.state_dict()) torch.save(obj=model.state_dict(), f='./modelv3.pth') # 训练结束后绘图 plt_image(epochs, evaloss, 'loss', 'Epochs', 'EvaLoss') plt_image(epochs, acc, 'ACC', 'Epochs', 'EvaAcc')
def main(): data = GetData('poloniex', newdata=False) tickers = ['OMG/BTC'] #tickers = data.tickers() periods = ['30m'] #periods = ['5m','15m','30m','1h'] maximum_parameters = [] proportion_test = 0.1 graph_results = True #data.fetch('BTC/USDT') # use if list length error for tick in tickers: if tick[-3:] == 'BTC': data.fetch(tick) temp_results = [] for period in periods: '''formats the bitcoin and current coin data to the right period''' tick_data = data.periodFormatter(tick, period) startDate = tick_data.index[0] btc_data = data.periodFormatter('BTC/USDT', period, startDate) prices = [close for close in tick_data['Close']] '''formats the raw data to the proportion of data chosen''' startDate = tick_data.index[int( (1 - proportion_test) * len(prices))] endDate = tick_data.index[-1] btc_prices = btc_data.loc[startDate:endDate]['Close'] tick_prices = tick_data.loc[startDate:endDate]['Close'] for MAlong in [10, 15, 30]: for x in [2, 3, 4]: MAshort = int(MAlong / x) if len(tick_prices) == len(btc_prices): strategy = Strategy(len(tick_prices)) for count, price in enumerate(tick_prices.values): strategy.tick(price, btc_prices.values[count]) strategy.movingaverage(MAlong, MAshort, count) profit, balance = strategy.returnParam() temp_results.append([ tick, period, tick_prices, strategy, profit, balance, MAlong, MAshort ]) else: print('length error') break optimumParam = None for result in temp_results: if optimumParam == None: optimumParam = result optimum = result elif result[5] > optimumParam[5]: optimumParam = result else: pass print(optimumParam[0], optimumParam[1], optimumParam[4], optimumParam[5], optimumParam[6], optimumParam[7]) maximum_parameters.append(optimumParam) for param in maximum_parameters: plot = Graphics(param[2], param[3].MAlong, param[3].MAshort, param[3].buylist, param[3].selllist, param[3].balanceList) plot.MA_plot()