def getInputData(batchsize): readdata = ReadData() trainingFiles, testingFiles = readdata.filePathConstructor() features = readdata.input_pipeline(trainingFiles, batchsize) example_batch = tf.reshape(features, [-1]) item = tf.string_split(example_batch, delimiter="").values.eval() return [dict1[alp.decode().lower()] for alp in list(item)]
def write2DB(self): from ReadData import ReadData #rd = ReadData(self.morning) ymd = str(self.date_time.year)+"-"+str(self.date_time.month)+"-"+str(self.date_time.day)+"-"+str(self.date_time.weekday()) if self.morning==True: ymd = "m"+ymd file_bak = "../daily/" + ymd + ".txt" rd = ReadData(self.morning) rd.gbk2utf8("深沪A股.TXT", file_bak) f = open(file_bak) try: con = mdb.connect('localhost', 'root', '1', 'financedb', charset='utf8') cur = con.cursor() Regex1 = re.compile(r"\d") while True: line = f.readline() if not line: break if not re.search(Regex1, line): print line continue data = rd.splitItem(line) for w in data: print w, print "" create_time = self.date_time.strftime('%Y-%m-%d %H:%M:%S') sql = "INSERT INTO " + self.table_name + \ "(idx, name, rise_rate, cur_price, rise_price, buy,\ sale, total_stock, cur_stock, rise_v, exchange, day_begin,\ up, down, yesterday, profit, total_price, quantity_ratio,\ industry, area, amplitude, ave_price, inner_market,\ outer_market, inner_outer_ratio, buy_volume, sale_volume,\ currency_capital, market_cap, create_time)\ VALUES( \'" + data[0] +"\',\'"+ data[1] +"\',"+ data[2] +","\ + data[3] +","+ data[4] +","+ data[5] +","+ data[6] +","\ + data[7] +","+ data[8] +","+ data[9] +","+ data[10] +","\ + data[11] +","+ data[12] +","+ data[13] +","+ data[14] +","\ + data[15] +","+ data[16] +","+ data[17] +",\'"+ data[18] +"\',\'"\ + data[19] +"\'," + data[20] +","+ data[21] +"," + data[22] +","\ + data[23] +","+ data[24] +","+ data[25] +","+ data[26] +","\ + data[27] +","+ data[28] +",\'"+ create_time + "\')" print sql cur.execute(sql) except mdb.Error, e: print "Error %d: %s" % (e.args[0], e.args[1]) sys.exit(1)
def DenoiseByEMD(): """ This function denoises the original signal by removing n levels of noises. """ globalData = ReadData() removeLevel = Settings.denoiseLevel filename = './Cashe/globalData_EMD_{}.pickle'.format(removeLevel) # if the denoising is not used (removeLevel = 0) if removeLevel < 1: globalDataEMD = globalData with open(filename, 'wb') as f: pickle.dump(globalDataEMD, f) return globalDataEMD if os.path.exists(filename): with open(filename, 'rb') as f: globalDataEMD = pickle.load(f) return globalDataEMD # Imfs globalDataImfs = GetImfs(globalData) globalDataEMD = {} for key in globalData: df = globalData[key] dfImfs = globalDataImfs[key] NLevels = dfImfs.shape[1] tsReons = dfImfs[range(Settings.denoiseLevel, NLevels)].sum(axis=1) tsReons.name = df.columns[0] globalDataEMD[key] = pd.DataFrame(tsReons) # Save the data to the filename as a pickle file with open(filename, 'wb') as f: pickle.dump(globalDataEMD, f) return globalDataEMD
def main(args): #---set up path for training and test data (NUAA face liveness dataset)-------------- model_name = args.model learning_rate = args.lr epoch = args.epoch with open(path) as file: print("Reading from json ... ") data = json.load(file)[model_name] accuracy = data['accuracy'] model_file = data['file'] print("Reading input from the NUAA dataset ... ") readd = ReadData() clientdir = '/content/drive/MyDrive/NormalizedFace_NUAA/ClientNormalized/' imposterdir = '/content/drive/MyDrive/NormalizedFace_NUAA/ImposterNormalized/' client_train_normaized_file = '/content/drive/MyDrive/NormalizedFace_NUAA/client_train_normalized.txt' imposter_train_normaized_file = '/content/drive/MyDrive/NormalizedFace_NUAA/imposter_train_normalized.txt' client_test_normaized_file = '/content/drive/MyDrive/NormalizedFace_NUAA/client_test_normalized.txt' imposter_test_normaized_file = '/content/drive/MyDrive/NormalizedFace_NUAA/imposter_test_normalized.txt' #---------------read training, test data---------------- train_images, train_labels = readd.read_data(clientdir, imposterdir, client_train_normaized_file, imposter_train_normaized_file) test_images, test_labels = readd.read_data(clientdir, imposterdir, client_test_normaized_file, imposter_test_normaized_file) for i in range(0,1): #--pick one of the following models for face liveness detection--- if model_name =='CNN': print("Selected CNN") cnn = CNNModel() # simple CNN model for face liveness detection--- else: print("Selected Inception") cnn = InceptionV4Model() #Inception model for liveness detection if args.resume: print("Resuming from the best model") model = cnn.load_model(model_file)#to use pretrained model else: print("Starting from scratch by creating a new model") model = cnn.create_model(learning_rate) # create and train a new model print("Starting training ...") model = cnn.train_model(model, train_images,train_labels,test_images,test_labels, epoch, accuracy, model_file, model_name) test_loss, test_acc = cnn.evaluate(model, test_images, test_labels) print('iteration = ' + str(i) + ' ---------------------------------------------========') print("**************************************Done***************************************")
def __init__(self): self.data = ReadData().please_read_data() self.cluster1 = [ ] #define cluster array so we can cluster elements to them self.cluster2 = [] self.cluster3 = [] self.centroid1 = [] #define centroid array so we can update them self.centroid2 = [] self.centroid3 = []
'''Runs training until cost values converge to within some interval''' val = self.linreg(learning_rate, ind, dep) old_val = 0 #Can change this variable to decide how much convergence is wanted while np.absolute(val-old_val) > 1: old_val = val val = self.linreg(learning_rate, ind, dep) self.getTheta() def getTheta(self): '''Prints out Value for current weight and bias variables''' print "Weight Bias" print self.weight, self.bias if __name__ == '__main__': #command line to run this properly #python NiceLinReg.py data.csv [2,3] 1 np.random.seed(42) loader = ReadData() loader.load(sys.argv[1], sys.argv[2], int(sys.argv[3])) print "Temp Only" tempOnly = NiceLinReg() dailyTemp = loader.getInd(0) DOJIA = loader.getDep() tempOnly.train(.000005, dailyTemp, DOJIA) print "\nDiff in Temp and avg highest recorded temp" diff = NiceLinReg() diffList = loader.diff(0,1) diff.train(0.000000000049, diffList, DOJIA)
def get_HvM(self, ): # Read Meta Meta = ReadMeta(neuralfeaturesdir) DF_img = Meta.get_DF_img() DF_neu = Meta.get_DF_neu() times = Meta.get_times() # Read Neural data Data = ReadData(datadir, DF_neu) IT, V4 = Data.get_data() D = Mapping.get_Neu_trial_V36(IT[1:], [70, 170], times) image_indices = np.random.randint(low=0, high=D.shape[1], size=ni) D = D[:, image_indices, :] D = np.swapaxes(D, 0, 1) nf = D.shape[1] nt = D.shape[2] mu = np.zeros((self.nf, self.ni)) sd = np.zeros((self.nf, self.ni)) for f in range(self.nf): for i in range(self.ni): mu[f, i] = D[i, f, :].mean() sd[f, i] = D[i, f, :].std() hf = h5py.File(resultdir + 'HvM_stats.h5', 'w') hf.create_dataset('mu', data=mu) hf.create_dataset('sd', data=sd) hf.close() # #test synthetic as HvM # nf = 168 # nt = 46 # noise_dist = 'poisson' # sds = np.logspace(-1, 1, num=int(nf)) # D = np.zeros((ni, nf, nt)) # D_mean = np.random.rand(ni, nf) # for tr in range(nt): # D[:, :, tr] = D_mean # # noise1 = np.zeros((ni, nf, int(nt * splitfract))) # noise2 = np.zeros((ni, nf, int(nt * splitfract))) # for i in range(ni): # if noise_dist == 'normal': # n = np.random.rand() # noise1[i] = np.array([np.random.normal(0, sd + n, size=int(nt * splitfract)) for sd in sds]) # noise2[i] = np.array([np.random.normal(0, sd + n, size=int(nt * splitfract)) for sd in sds]) # elif noise_dist == 'poisson': # n = np.random.rand() # noise1[i] = np.array([np.random.poisson(sd + n, size=int(nt * splitfract)) for sd in sds]) # noise2[i] = np.array([np.random.poisson(sd + n, size=int(nt * splitfract)) for sd in sds]) # # D[:, :, :int(nt * splitfract)] = D[:, :, :int(nt * splitfract)] + noise1 # D[:, :, int(nt * splitfract):] = D[:, :, int(nt * splitfract):] + noise2 # to test HvM as syntheic # hf = h5py.File(resultdir+'D.h5', 'w') # hf.create_dataset('D', data=D) # hf.close() sds = [] Collinearity = 'HvM' noise_dist = 'HvM' return D
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Sat May 5 01:20:32 2018 @author: computer """ from ReadData import ReadData from catboost import Pool, CatBoostRegressor from sklearn.model_selection import train_test_split import pandas as pd import gc data = ReadData() data.X_train.drop(data.X_train.index[:int( 1e+8 )], inplace=True) data.y_train.drop(data.y_train.index[:int( 1e+8 )], inplace=True) X_train, X_valid, y_train, y_valid = train_test_split(data.X_train, data.y_train, test_size=0.1) cat_features = X_train.columns.get_indexer_for(data.CATEGORICAL_FEATURES) del data; gc.collect() train_pool = Pool(X_train, y_train, cat_features=cat_features) del X_train; del y_train; gc.collect()
5, 1, stride=2, activation_fn=tf.nn.sigmoid)).tensor def get_generator_loss(D2): '''Loss for the genetor. Maximize probability of generating images that discrimator cannot differentiate. Returns: see the paper ''' return tf.reduce_mean( tf.nn.relu(D2) - D2 + tf.log(1.0 + tf.exp(-tf.abs(D2)))) if __name__ == "__main__": rd = ReadData() input_tensor = tf.placeholder(tf.float32, [FLAGS.batch_size, 32 * 32]) with pt.defaults_scope(activation_fn=tf.nn.elu, batch_normalize=True, learned_moments_update_rate=0.0003, variance_epsilon=0.001, scale_after_normalization=True): with tf.variable_scope("model"): D1 = discriminator(input_tensor) # positive examples D_params_num = len(tf.trainable_variables()) G = generator() with tf.variable_scope("model", reuse=True): D2 = discriminator(G) # generated examples
from ReadData import ReadData import tensorflow as tf import os.path import numpy as np rd = ReadData() input_dim = 1024 # 32 * 32 hidden_encoder_dim = 400 hidden_decoder_dim = 400 latent_dim = 20 lam = 0 def weight_variable(shape): initial = tf.truncated_normal(shape, stddev=0.001) return tf.Variable(initial) def bias_variable(shape): initial = tf.constant(0., shape=shape) return tf.Variable(initial) # input x = tf.placeholder("float", shape=[None, input_dim]) # input_dim=32*32 l2_loss = tf.constant(0.0) # l2_loss is a number # hidden W, b W_encoder_input_hidden = weight_variable([input_dim, hidden_encoder_dim]) # [1024, 400]
inputs = [(args.no_comments, 512), (30, 1536)] model = RecurrentCNN(no_filters=hidden_size, no_classes=args.no_classes) elif args.model == 'bilstm_rcnn': inputs = [(args.no_comments, 512), (30, 1536)] model = BiLSTMRecurrentCNN(hidden_size, no_classes=args.no_classes) model = model.build(inputs) model.load_weights(args.weights) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) model.summary() reader = ReadData(dataset=args.dataset, text_embedding_path=args.text_embedding, video_feature_path=args.video_features, data_shape=inputs, train_val_split=1.) results = [] labels = [] prog_bar = tqdm(total=int(reader.val_size / args.batch_size)) num_batches = int(reader.val_size / args.batch_size) i = 0 for x, y in reader.generator_val(batch_size=args.batch_size): label = list(y) result = list(model.predict(x))
from GA import GA from ReadData import ReadData from utils import * data = ReadData("easy_01_tsp.txt") params = {'popSize': 100, 'noGen': 100} ga = GA(params, data.problParams) ga.initialisation() ga.evaluation() bestFitness = 0 bestDist = 0 bestChromoOverallRepres = None for g in range(ga.getParam()['noGen']): ga.oneGenerationElitism() # ga.oneGeneration() # ga.oneGenerationSteadyState() bestChromo = ga.bestChromosome() if bestChromo.fitness > bestFitness: bestChromoOverallRepres = bestChromo.repres bestFitness = bestChromo.fitness bestDist = str(dist(bestChromo.repres, ga.getProblParam())) print('Best solution in generation ' + str(g) + ' is: ' +str(bestChromo.repres) + ' fitness = ' + str(bestChromo.fitness) + ' dist: ' + str(dist(bestChromo.repres,ga.getProblParam()))) print("\n") print('Best solution overall is: ' + str(bestChromoOverallRepres) + ' fitness = ' + str(bestFitness) + ' dist: ' + str(bestDist))
__version__ = "0.0.1" __maintainer__ = "Agniv Sen" __email__ = "*****@*****.**" __status__ = "Protoyping" """ # **************************************************** # This is the entry point of this entire project. # For someone who wants to understand the code flow, please start from this point # **************************************************** rd = ReadData(); #Initializing File Reader Class featureMap = np.zeros((param.MAX_OBSERVATION, param.MAX_FEATURES,2)) featureMapProj = np.zeros((param.MAX_OBSERVATION, param.MAX_FEATURES,param.PARTICLE_COUNT,3)) featureStore = np.zeros((param.FEATURE_SIZE)) world = np.zeros((param.WORLD_SCALE_X, param.WORLD_SCALE_Y, param.WORLD_SCALE_Z)); stateVectorSize = (param.POSITION_VECTOR_SIZE + param.QUATERNION_SIZE + param.TRANSLATIONAL_VELOCITY_VECTOR_SIZE + param.ANGULAR_VELOCITY_VECTOR_SIZE) cameraState = np.zeros((stateVectorSize)) # Variables for archiving position vector, quaternion and features _x = [] _y = [] _z = []
''' Created on 5 apr. 2020 @author: Alexandraah ''' from GA import GA from ReadData import ReadData from utils import * import matplotlib.pyplot as plt data = ReadData("C:\\@Alexandra\\anul2\\semestrul2\\ai\\lab\\laborator4\\berlin.txt") params = {'popSize': 500, 'noGen': 1000} ga = GA(params, data.problParams) ga.initialisation() ga.evaluation() res=[] res1=[] for i in range(params['noGen']): #ga.oneGeneration() ga.oneGenerationElitism() #ga.oneGenerationSteadyState() best = ga.bestChromosome() fitnesses = [c.fitness for c in ga.population] avgFitness = sum(fitnesses) / len(fitnesses) res.append(avgFitness) for c in ga.population: res1.append(c.fitness) print("Fiteness:"+str(c.fitness)+"\n") print('Generation: ' + str(i) + '\nBest chromosome: ' + str(best.repres) + '\nLocal best fitness: ' + str(best.fitness)
__credits__ = [] __license__ = "GNU GPL" __version__ = "0.0.1" __maintainer__ = "Agniv Sen" __email__ = "*****@*****.**" __status__ = "Protoyping" """ # **************************************************** # This is the entry point of this entire project. # For someone who wants to understand the code flow, please start from this point # **************************************************** __name__ = '__main__' rd = ReadData() #Initializing File Reader Class featureMap = np.zeros((param.MAX_OBSERVATION, param.MAX_FEATURES, 2)) featureMapProj = np.zeros( (param.MAX_OBSERVATION, param.MAX_FEATURES, param.PARTICLE_COUNT, 3)) featureStore = np.zeros((param.FEATURE_SIZE)) world = np.zeros( (param.WORLD_SCALE_X, param.WORLD_SCALE_Y, param.WORLD_SCALE_Z)) stateVectorSize = (param.POSITION_VECTOR_SIZE + param.QUATERNION_SIZE + param.TRANSLATIONAL_VELOCITY_VECTOR_SIZE + param.ANGULAR_VELOCITY_VECTOR_SIZE) cameraState = np.zeros((stateVectorSize)) # Variables for archiving position vector, quaternion and features
import matplotlib.pyplot as plt import matplotlib.image as mpimg import numpy as np import math ############ from ReadData import ReadData from SHE import SHE #####Parameters of experimental device######### global pixel_width #pixel width,unit:nm pixel_width = 320e3 lam_x = 0.124 # X-ray wavelength, unit:nm dis_s2d = 5300e6 #sample-to-detector distance, unit:nm ############constant definition################ pi = math.pi ############################################### init_data = ReadData("data/1.4/50k-132-1.401286.asc") px_min = 122 px_max = 362 pz_min = 166 pz_max = 326 cen_px = (px_max + px_min) / 2 cen_pz = (pz_max + pz_min) / 2 I = init_data[pz_min:(pz_max + 1), px_min:(px_max + 1)] plt.imshow(I[:, 40:200]) plt.axis('off') plt.colorbar() #EI = np.sqrt(I) #I_max = 0.05*np.max(I) ######################### #x = pixel_width*np.arange(px_min-cen_px,px_max-cen_px+1) #z = pixel_width*np.arange(pz_min-cen_pz,pz_max-cen_pz+1)
H = np.dot(np.linalg.pinv(R), P).T x_est = np.dot(H, y) err = x - x_est g = lambda x: np.dot(H, x) R = np.dot(err, err.T) / l return g, R if os.name == 'posix': trainPath = os.path.abspath('.') + '/Data/train.txt' testPath = os.path.abspath('.') + '/Data/test.txt' elif os.name == 'nt': trainPath = os.path.abspath('.') + '\\Data\\train.txt' testPath = os.path.abspath('.') + '\\Data\\test.txt' TrainX, TrainY = ReadData(trainPath, 'train') # TestX, TestY = ReadData(testPath, 'test') # two types of Test DataSet TrainX = TrainX[:, 0:-500] TrainY = TrainY[:, 0:-500] TestX = TrainX[:, -500:] TestY = TrainY[:, -500:] xTrainDim, TrainLen = np.shape(TrainX) print(str(xTrainDim) + " " + str(TrainLen)) yTrainDim, TrainLen = np.shape(TrainY) xTestDim, TestLen = np.shape(TestX) print(str(yTrainDim) + " " + str(TestLen)) # kalman filter
default='nmt_logs') parser.add_argument( '--inference', action="store_true", help='Whether to run inference or simply train the network') parser.add_argument('--pretrained_path', help='Path to Pre-trained Weights') args = parser.parse_args() assert args.dataset.endswith('csv'), "Dataset File needs to be in CSV format" assert 0. <= args.train_val_split < 1., "Train-vs-Validation Split need to be between [0, 1)" latent_dim = args.latent_dim # Reading and Preparing Training/Validation Dataset reader = ReadData(args.dataset, args.train_val_split, args.language_1, args.language_2) (X_train, y_train), (X_val, y_val) = reader.prep_data() train_samples = len(X_train) val_samples = len(X_val) num_encoder_tokens = reader.num_encoder_tokens num_decoder_tokens = reader.num_decoder_tokens # Loading Embedding Matrix lang1_embedding = Word2Vec.load(args.lang1_embedding) lang1_tok = Tokenizer() lang1_tok.fit_on_texts(reader.language_1_text) encoder_embedding_matrix = np.zeros((num_encoder_tokens, latent_dim)) for word, i in lang1_tok.word_index.items(): try: embedding_vector = lang1_embedding[word]
# ---------- # import original data # ---------- Datapath = '../../methylation_imputation/data/' DataSample_full = Datapath + 'intersected_final_chr1_cutoff_20_sample_full.bed' DataSample_partial = Datapath + 'intersected_final_chr1_cutoff_20_sample_partial.bed' DataTrain = Datapath + 'intersected_final_chr1_cutoff_20_train.bed' # Training data # use either one of the following three: # DataTrain = ReadData(DataTrain) # DataTrain = pd.read_csv('../result/Train_NaN_Meaned', sep = '\t') DataTrain = pd.read_csv('../result/Train_NaN_Meaned_without_2627', sep = '\t') # Sample data DataSample_full = ReadData(DataSample_full) DataSample_partial = ReadData(DataSample_partial) # ---------- # Format Data so they are indexed by start position # ---------- DataTrain.set_index('start', drop=False, inplace=True, verify_integrity=True) DataSample_full.set_index('start', drop=False, inplace=True, verify_integrity=True) DataSample_partial.set_index('start', drop=False, inplace=True, verify_integrity=True) # ---------- # Read the Imputation result # ---------- Resultpath = '../result/raw/'
from ReadData import ReadData import codecs today = datetime.now() ym = str(today.year)+str(today.month) ymd = str(today.year)+"-"+str(today.month)+"-"+str(today.day) table_name = "stock" + ym try: con = mdb.connect('localhost', 'root', '1', 'financedb', charset='utf8') cur = con.cursor() createTable(cur, ym) rd = ReadData() rd.gbktoutf8("深沪A股.TXT") file_name = ymd + "." + "txt" f = open(file_name) title = f.readline() #read the title print title while True: line = f.readline() print line if not line: #EOF break if not re.search(r"\d", line): continue data = rd.splitItem(line) for w in data:
if args.model.endswith('lstm'): model = ConvLSTMModel1(hidden_states, classes, attention_size=attention_size, use_attention=args.use_attention) elif args.model.endswith('deep'): model = ConvLSTMModel2(hidden_states, classes, attention_size=attention_size, use_attention=args.use_attention) else: model = ConvModel(classes) reader = ReadData(args.training_csv, args.embedding, args.classes, batch_size=args.batch_size, no_samples=args.no_samples, train_val_split=args.train_val_split) print('Reading Validation data.') val_x, val_y = reader.read_all_val() if args.model.startswith('cnn'): val_x = np.reshape(val_x, (val_x.shape[0], timesteps, embed_size, 1)) with tf.name_scope('Model'): prediction = model.model(x) with tf.name_scope('Loss'): crossent = tf.nn.softmax_cross_entropy_with_logits_v2(logits=prediction, labels=y) cost_func = (tf.reduce_mean(crossent)) / args.batch_size
optimizer=optimizer, metrics=['accuracy']) else: model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy']) model.summary() if args.check_build: exit() embedding = {'type': args.embedding_type, 'path': args.embedding_path} if args.model == 'sentence_pair': reader = ReadData(path_file=args.dataset, embedding_config=embedding, data_shape=inputs, train_val_split=args.train_val_split, sentence_pair=True) else: reader = ReadData(path_file=args.dataset, embedding_config=embedding, data_shape=inputs, train_val_split=args.train_val_split, sentence_pair=False) print('Reading Validation Data ..') val_x, val_y = reader.read_val() train_generator = reader.generator() log_dir = args.model
def mainFunc(): filepath = 'D:/Code/readfile/data.csv' d = ReadData(filepath) print(d.data.keys())
ax1.spines['bottom'].set_color("#5998ff") ax1.spines['top'].set_color("#5998ff") ax1.spines['left'].set_color("#5998ff") ax1.spines['right'].set_color("#5998ff") #设置y轴刻度值的颜色 ax1.tick_params(axis='y', colors='w') plt.gca().yaxis.set_major_locator(mticker.MaxNLocator(prune='upper')) ax1.tick_params(axis='x', colors='w') plt.ylabel('Stock price and Volume') plt.rcParams['font.sans-serif'] = ['SimHei'] plt.suptitle("股票代码:{}".format(ts_code), color='w', fontsize=40) plt.show() print(stock_data) if __name__ == "__main__": #股票代码 ts_code = "000001.SZ" #对比结果 result = { 'start_time': '20160412', 'end_time': '20160707', 'pearson_index': 0.945269566803306 } read_data = ReadData() source_data = read_data.mysql_read_date(ts_code, result["start_time"], result["end_time"]) k_plot = KLinePlot() k_plot.plot_k_line(source_data, ts_code)
import matplotlib.pyplot as plt import numpy as np import scipy as sp from math import sqrt from ReadData import ReadData from ExtractSamples import ExtractSamples from ColorModels import ColorModels from CrossValidation import CrossValidation from RunKMeans import RunKMeans from RunEM_GMM import RunEM_GMM from RunCommands import RunFCM, RunPCM from sklearn.metrics import confusion_matrix, roc_curve from scipy.stats import multivariate_normal data_train, labels, locations = ReadData() # Load all data Data, ObjLabels = ExtractSamples(data_train, labels, locations) # extract objects plt.close("all") # close all image plots Data = Data / 255 #Normalize pixel values to be between 0 and 1 Data_HSV, Data_YIQ, Data_HLS = ColorModels( Data, ObjLabels) # Transform RGB to different color spaces DTrain, DVal, labelsTrain, labelsVal = CrossValidation( Data, ObjLabels, 0.8, 'RGB') #80% of data for training and 20% for testing DTrain_HSV, DVal_HSV, labelsTrain_HSV, labelsVal_HSV = CrossValidation( Data_HSV, ObjLabels, 0.8, 'HSV') #80% of data for training and 20% for testing DTrain_YIQ, DVal_YIQ, labelsTrain_YIQ, labelsVal_YIQ = CrossValidation( Data_YIQ, ObjLabels, 0.8, 'YIQ') #80% of data for training and 20% for testing
# logits = drnn(X,W,b) # predict = tf.nn.softmax(logits) # Y = predict # # Cost & Optimizer # loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=Y)) # optimizer = tf.train.GradientDescentOptimizer(learning_rate=learningrate) # train_op = optimizer.minimize(loss_op) # # build accuracy # correct_pred = tf.equal(tf.argmax(predict,1),tf.argmax(Y,1)) # accuracy = tf.reduce_mean(correct_pred,tf.float32) # Initiate Global variable init = tf.global_variables_initializer() readdata = ReadData() trainingFiles, testingFiles = readdata.filePathConstructor() features = readdata.input_pipeline(trainingFiles, batch_size) # Start training with tf.Session() as sess: # init session sess.run(init) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) # loop training steps for step in range(training_steps): # read input data example_batch = tf.reshape(features, [-1])
similar_value = 0.5 * self.calc_pearson( mul_open, atom_open) + 0.5 * self.calc_pearson( mul_close, atom_close) if (result["pearson_index"] < similar_value): result = { "start_time": temp_compare[0]["trade_date"], "end_time": temp_compare[-1]["trade_date"], "pearson_index": similar_value } # cal_finish_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) # end_time = datetime.datetime.strptime(cal_finish_time, "%Y-%m-%d %H:%M:%S") # print("Calc Cost: {}".format(str((end_time - start_time).seconds))) return result if __name__ == "__main__": read_data = ReadData() #设置比较的周期 num = 60 #选取需要查找的最后num天数的数据 source_data = read_data.mysql_read_data("002936.SZ").iloc[-num:] if len(source_data) < num: num = len(source_data) #选取某一支对比的股票并除去最后的num天数据 compare_data = read_data.mysql_read_data("000001.SZ").iloc[:-num] #实例化类,输入的格式为Dataframe compare = CompareSimilarKDynamic(source_data, compare_data, num) #返回对比的皮尔逊系数最高的一组数据 result = compare.compare_dynamic() print(result)
def main(): readdata = ReadData() trainingFiles, testingFiles = readdata.filePathConstructor() features = readdata.input_pipeline(trainingFiles, batch_size) with tf.Session() as sess: # Create the graph, etc. init_op = tf.global_variables_initializer() sess.run(init_op) # Start populating the filename queue. coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) dict1 = { value: (int(key) + 1) for key, value in enumerate(list(string.ascii_lowercase)) } dict1[' '] = 0 dict1[';'] = -1 dict1['-'] = -1 vocab_size = len(dict1) for i in range(1): example_batch = tf.reshape(features, [-1]) item = tf.string_split(example_batch, delimiter="").values.eval() chars = [dict1[alp.decode().lower()] for alp in list(item)] data_size = len(chars) print('Data has %d characters, %d unique.' % (data_size, vocab_size)) # # Hyper-parameters # hidden_size = 100 # hidden layer's size # seq_length = 25 # number of steps to unroll # learning_rate = 1e-1 # inputs = tf.placeholder(shape=[None, vocab_size], dtype=tf.float32, name="inputs") # targets = tf.placeholder(shape=[None, vocab_size], dtype=tf.float32, name="targets") # init_state = tf.placeholder(shape=[1, hidden_size], dtype=tf.float32, name="state") # intializer = tf.random_normal_initializer(stddev=1.0) # with tf.variable_scope("RNN") as scope: # hs_t = init_state # ys = [] # for t,xs_t in enumerate(tf.split(inputs,seq_length,axis=0)): # if t > 0:scope.reuse_variables() # Wxh = tf.get_variable("Wxh",shape=[vocab_size,hidden_size],dtype=tf.float32,intializer=intializer) # Whh = tf.get_variable("Whh",shape=[hidden_size,hidden_size],dtype=tf.float32,intializer=intializer) # Why = tf.get_variable("Why",shape=[hidden_size,vocab_size],dtype=tf.float32,intializer=initializer) # bh = tf.get_variable("bh",shape=[hidden_size],intializer=intializer) # by = tf.get_variable("by",shape=[vocab_size],initializer=intializer) # hs_t = tf.tanh(tf.matmul(xs_t,Wxh) + tf.matmul(hs_t,Whh) + bh) # ys_t = tf.matmul(hs_t,Why) + by # ys.append(ys_t) # h_prev = hs_t # output_softmax = tf.nn.softmax(ys[-1]) # outputs = tf.concat(ys,axis=0) # loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=targets,logits=outputs)) # #optimizer # minimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) # grad_and_vars = minimizer.compute_gradients(loss) # pred = RNN(chars,weights,biases) # # Loss and optimizer # # cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y)) # # optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate).minimize(cost) # # # Model evaluation # # correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1)) # # accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) # # print(example_batch) coord.request_stop() coord.join(threads)
self.assignAspect(sentence) self.populateAspectWordMat() changed=self.calcChiSq() self.corpus.aspectSentences.clear() for review in self.corpus.allReviews: for sentence in review.sentences: self.assignAspect(sentence) print(self.corpus.aspectKeywords) # Saves the object into the given file def saveToFile(self,fileName,obj): with open(modelDataDir+fileName,'w') as fp: json.dump(obj,fp) fp.close() rd = ReadData() rd.readAspectSeedWords() rd.readStopWords() rd.readReviewsFromJson() rd.removeLessFreqWords() bootstrapObj = BootStrap(rd) bootstrapObj.bootStrap() bootstrapObj.populateLists() bootstrapObj.saveToFile("wList.json",bootstrapObj.wList) bootstrapObj.saveToFile("ratingsList.json",bootstrapObj.ratingsList) bootstrapObj.saveToFile("reviewIdList.json",bootstrapObj.reviewIdList) bootstrapObj.saveToFile("vocab.json",list(bootstrapObj.corpus.wordFreq.keys())) bootstrapObj.saveToFile("aspectKeywords.json",bootstrapObj.corpus.aspectKeywords) # In[ ]:
atom_close = self.dynamic(atom_close,"close",index) similar_value = 0.5 * self.calc_pearson(mul_open,atom_open) + 0.5 * self.calc_pearson(mul_close,atom_close) if (result["pearson_index"] < similar_value): result = { "start_time": temp_compare[0]["trade_date"], "end_time": temp_compare[-1]["trade_date"], "pearson_index": similar_value } # cal_finish_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) # end_time = datetime.datetime.strptime(cal_finish_time, "%Y-%m-%d %H:%M:%S") # print("Calc Cost: {}".format(str((end_time - start_time).seconds))) return result if __name__ =='__main__': read_data = ReadData() ts_code_list = read_data.mysql_read_ts_code()[:] print("----------------Complete ts_code reading--------------------:{}") start_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) print("--------Start cal----------:{0}".format(start_time)) results_dict = {} source_data = read_data.mysql_read_data("000009.SZ").iloc[-60:] for ts_code in ts_code_list: #print(ts_code) compare_data = read_data.mysql_read_data(ts_code).iloc[:-60] if len(compare_data) < 60: print("该股票数据不足") else: compare = CompareSimilarKDynamic(source_data, compare_data, 60) result = compare.compare_dynamic() results_dict[ts_code] = result
# ,test_size=test_size) #--------DROP ID column from train and test #if ISTRAIN == 1: tmpModel,df = trainingAlgo(X_train,y_train,X_test,y_test) model = tmpModel """ _,acc,rocScore = models.evaluateModel(X_test,y_test,tmpModel) if roc < rocScore: roc = rocScore model = tmpModel print("Accurachy %f, ROC Score %f" % (acc,roc)) """ return model,df #------Get feature set and create classes readData = ReadData(".","HomeCredit","sa","Pass@123") models = Models() featureSet = readData.getData("dbo.FeatureSet") featureSet = models.convertCategoricalVaribalesToOneHotEncoding(featureSet) featureSet = models.addFeatures(featureSet) train = featureSet[featureSet["TARGET"] != -1] test = featureSet[featureSet["TARGET"] == -1] test_ids = test["SK_ID_CURR"] test.drop(["TARGET","SK_ID_CURR"],axis = 1,inplace = True) train.drop(["SK_ID_CURR"],axis = 1,inplace = True) train["TARGET"] = train["TARGET"].astype("category")
metrics=['accuracy']) else: model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) model.load_weights(args.weights) print('Model Loaded from {}.'.format(args.weights)) model.summary() embedding = {'type': args.embedding_type, 'path': args.embedding_path} if args.model == 'sentence_pair': reader = ReadData(path_file=None, embedding_config=embedding, data_shape=inputs, sentence_pair=True) else: reader = ReadData(path_file=None, embedding_config=embedding, data_shape=inputs, sentence_pair=False) test_data = pd.read_excel(args.dataset, sheet_name=None)['Sheet1'] test_data = test_data.sample(frac=1.0).reset_index(drop=True) test_data = test_data.head(int(len(test_data) * args.size)) print(test_data.columns) assert len(test_data.columns) > 1, "Labels of Test set not available."
def main(args): # judge input arguments length if len(args) != 6: print('Should Have Six Input Arguments') exit(0) # input parameters L = int(args[0]) K = int(args[1]) training_set_file_name = args[2] validation_set_file_name = args[3] test_set_file_name = args[4] to_print = True if args[5].lower() == 'yes' else False path = './' + DATA_DIRECTORY + '/' # read data from training set, test set, and validation set rd = ReadData() labels, training_set = rd.createDataSet(path + training_set_file_name) labels, validation_set = rd.createDataSet(path + validation_set_file_name) labels, test_set = rd.createDataSet(path + test_set_file_name) # build tree dt = DecisionTree() info_gain_tree_root = dt.buildDT(training_set, labels.copy(), 'information_gain') pruned_info_gain_tree_root = dt.pruneTree(info_gain_tree_root, L, K, validation_set, labels) variance_impurity_tree_root = dt.buildDT(training_set, labels.copy(), 'variance_impurity') pruned_variance_impurity_tree_root = dt.pruneTree( variance_impurity_tree_root, L, K, validation_set, labels) print() info_accuracy = dt.calAccuracy(test_set, info_gain_tree_root, labels) print('Accuracy of decision tree constructed using information gain: %s' % info_accuracy) variance_accuracy = dt.calAccuracy(test_set, variance_impurity_tree_root, labels) print('Accuracy of decision tree constructed using variance impurity: %s' % variance_accuracy) prune_info_accuracy = dt.calAccuracy(test_set, pruned_info_gain_tree_root, labels) print( 'Accuracy of pruned decision tree constructed using information gain: %s' % prune_info_accuracy) pruned_variance_accuracy = dt.calAccuracy( test_set, pruned_variance_impurity_tree_root, labels) print( 'Accuracy of pruned decision tree constructed using variance impurity: %s' % pruned_variance_accuracy) if (to_print): print() print('Build Decision Tree By Using Information Gain') info_gain_tree_root.printTree() print() print() print('Build Decision Tree By Using Variance Impurity') variance_impurity_tree_root.printTree() print()