def pass_data(file_num, alg_choice = None): """ This function takes the data obtained from the selected menu choices above, and passes them to their respective algorithms. *** Should be phased out after implementing GUI. *** Args: file_num (String): The option chosen for number of files selected, either 1, multiple or a test sinusoid is selected. alg_choice (String, optional): Used in the case of creating a test sinusoid. Algorithm is chosen prior to . Defaults to None. """ if file_num == "1": time, detrended_flux, background = data_process.get_data() # Change values in columns to float values for later processing. time = [float(data) for data in time] detrended_flux = [float(data) for data in detrended_flux] noise = [float(data) for data in background] while(True): alg_choice = input("Select analysis method: \n1 - Time Series \n2 - Lomb-Scargle \n3 - Autocorrelation \n4 - Morlet Wavelet \n5 - GPS\n6 - All\n0 - Exit Program\n") alg.selection(time, detrended_flux, alg_choice) else: time, detrended_flux, background = data_process.get_data() # Change values in columns to float values for later processing. time = [float(data) for data in time] detrended_flux = [float(data) for data in detrended_flux] noise = [float(data) for data in background] alg.selection(time, detrended_flux, alg_choice)
def __init__(self, data_path, epochs=210): self.epochs = epochs self.gen_optimizer = tf.keras.optimizers.Adam(0.0002, 0.5) self.disc_optimizer = tf.keras.optimizers.Adam(0.0002, 0.5) self.generator = Generator() self.discriminator = Discriminator() self.cross_entropy = tf.keras.losses.BinaryCrossentropy( from_logits=True) self.condition_weekend = np.array([[1, 0]]).repeat(batch_size, axis=0) # weekend self.condition_workday = np.array([[0, 1]]).repeat(batch_size, axis=0) # workday self.monthly_parking_rate = get_data(data_path) self.seed = sample_noise( batch_size) # tf.random.normal([batch_size, 1], 0.5, 0.2) self.avg_weekend, self.avg_workday = self.get_average( self.monthly_parking_rate)
def main(): data = get_data(max_len=FLAGS.max_len) cls_name = FLAGS.classifier module_name = ".".join(cls_name.split('.')[:-1]) cls_name = cls_name.split('.')[-1] _module = importlib.import_module(module_name) cls = _module.__dict__.get(cls_name) model = cls(data=data, nb_epoch=FLAGS.nb_epoch, max_len=FLAGS.max_len, embed_size=FLAGS.embed_size, batch_size=FLAGS.batch_size, optimizer=FLAGS.optimizer, use_pretrained=FLAGS.use_pretrained, trainable=FLAGS.trainable, is_kfold=True, kfold=10, is_retrain=True) model.model_predict_with_weights(FLAGS.kfold_model_path)
import pandas as pd from data_process import get_data import xgboost as xgb import csv # load the data x_train, y_train, x_val = get_data() # train the model reg = xgb.XGBRegressor() reg.fit(x_train, y_train, eval_set=[(x_train[8000:14007], y_train[8000:14007])]) # prediction y_pred = reg.predict(x_val) test_result = [] for i in range(len(x_val)): test_result.append(y_pred[i]) print(test_result) # write the result result = csv.reader(open('test.csv', 'r')) result = [i for i in result] result[0].append('speed') for i in range(1, len(result)): result[i].append(test_result[i - 1]) with open('result.csv', 'w', newline='') as f: f_csv = csv.writer(f)
SEED = 7 split_ratio = 0.8 SEQ_LENGTH = 256 BATCH_SIZE = 64 USE_CUDA = torch.cuda.is_available() device = torch.device('cuda' if USE_CUDA else 'cpu') VOCAB_SIZE = 10000 EMBED_DIM = 100 HIDDEN_DIM = 128 OUTPUT_DIM = 2 learning_rate = 1e-4 NUM_EPOCHS = 8 MODEL_PATH = './models/bi_rnn_model.pth' vocab, train_iterator, valid_iterator, test_iterator = get_data( SEQ_LENGTH, SEED, split_ratio, VOCAB_SIZE - 2, BATCH_SIZE, device) class RNNModel(nn.Module): def __init__(self, vocab_size, embed_dim, hidden_dim, output_dim, bidirectional=False): super(RNNModel, self).__init__() self.bidirectional = bidirectional self.embed = nn.Embedding(vocab_size, embed_dim) self.rnn = nn.RNN(embed_dim, hidden_dim, bidirectional=bidirectional,
dev_file = options.val_data test_file = options.test_data domain_file = options.domain_file domain_test_file = options.domain_test_file MAX_SEQUENCE_LENGTH = 20 MAX_NB_WORDS = 20000 EMBEDDING_DIM = 300 batch_size = 256 nb_classes = 2 modelFile = options.w2v_model_file #"../w2v_models/crisis_word_vector.txt" emb_model = KeyedVectors.load_word2vec_format(modelFile, binary=False) # emb_model="" delim = "\t" data, _, _ = data_process.get_data(train_file, delim) ul_data, _, _ = data_process.get_data(domain_file, delim) data.extend(ul_data) print("Number of inst for vocab: " + str(len(data))) word_index, tokenizer = data_process.get_tokenizer(data, MAX_NB_WORDS, MAX_SEQUENCE_LENGTH) train_x, train_y, train_le, train_labels, _, _ = data_process.get_dev_data_with_id( train_file, tokenizer, MAX_SEQUENCE_LENGTH, delim) dev_x, dev_y, dev_le, dev_labels, _, _ = data_process.get_dev_data_with_id( dev_file, tokenizer, MAX_SEQUENCE_LENGTH, delim) test_x, test_y, test_le, test_labels, _, _ = data_process.get_dev_data_with_id( test_file, tokenizer, MAX_SEQUENCE_LENGTH, delim) domain_x, domain_y, _, _, _, _ = data_process.get_dev_data_with_id( domain_file, tokenizer, MAX_SEQUENCE_LENGTH, delim)
all_start = datetime.datetime.now() # 程序开始运行 # 超参数设定 sample_size = 96 # 一次性学习的样本大小 hidden = 100 # LSTM层的神经元个数 batch_size = 1 # LSTM层的batch_size time_step = 96 # LSTM层的timestep learn_set = 1700 # 迭代次数列表 cond_dim = 2 # 条件值 latent_dim = 1 # latent space 维度 num_run = 1 # 运行的次数 num_gen_once = 1 # 单次生成序列 LR = 0.001 # 学习率 # 数据处理部分 data = '../data' # 读取data文件夹内的数据集,数据集是一个月内的停车数据 每十五分钟取一个点,每天96个点 sample_set, index, num_seq = get_data(data, sample_size) index_list = [i for i in range(num_seq)] # 数据生成部分 g_data_ = [] for loop in range(num_run): begin = datetime.datetime.now() g_data = train(sample_set, index, sample_size, learn_set, batch_size, hidden, time_step, num_seq, num_gen_once, LR, latent_dim, cond_dim) g_data_.append(g_data) end = datetime.datetime.now() print('用时: ', end - begin) end_end = datetime.datetime.now() print('总用时:', end_end - all_start) g_data_ = np.array(g_data_)
menu = True while(menu): menu_selec = input("Select file option: \n1 - Single file \n2 - Multiple Files \n0 - Exit Program\n\n") if(menu_selec == "1"): file_path = input("Choose file for period analysis: ") File_Management.read_input_file(file_path) pass_data(menu_selec) menu = False elif(menu_selec == "2"): files = File_Management.open_dir() alg_choice = input("Select analysis method: \n1 - Time Series \n2 - Lomb-Scargle \n3 - Autocorrelation \n4 - Wavelets \n5 - All\n0 - Exit Program\n") for path in files: File_Management.read_input_file(path) pass_data(menu_selec, alg_choice) elif(menu_selec == "3"): data_process.create_sin() alg_choice = input("Select analysis method: \n1 - Time Series \n2 - Lomb-Scargle \n3 - Autocorrelation \n4 - Wavelets \n5 - All\n0 - Exit Program\n") time, detrended_flux, background = data_process.get_data() alg.selection(time, detrended_flux, alg_choice) elif(menu_selec == "0"): sys.exit() else: print("This is not a valid selection.")
def resolution(self, url): c = super(GetProduct, self).parse(url) html = c[0] status = c[1] ProductCheck = tool.tools.ProductCheck(url) if ProductCheck == 0: if status == 200: tree = etree.HTML(html) pid = tool.tools.get_id(url)[0] cid = tool.tools.get_id(url)[1] title = tree.xpath( "//div[@class='goodsd-right col-sm-5']//h4/text()") title = "".join(title).strip() price = tool.tools.get_price(pid)[0] orig_price = tool.tools.get_price(pid)[1] description = tree.xpath( "//div[@class='goodsd-right col-sm-5']//div[@class='kv']/div//text()" ) description = [ x.strip() for x in description if x != '\n ' ] description = [x for x in description if x != ''] description = "\n".join(description) size_fits = tree.xpath("//table[@class='kv']")[0] del size_fits.attrib['class'] for size_fit in size_fits: del size_fit.attrib['class'] for i in size_fit: # keys(1) del i.attrib['class'] i.set('valign', '321') del i.attrib['valign'] i.set('colspan', '321') del i.attrib['colspan'] # print(size_fit) # list_size.append(size_fit) size_fits = etree.tostring(size_fits) size_fits = size_fits.decode().replace('\n', '') sku = tree.xpath( "//div[@class='summary']/span[@class='sku']/text()") sku = "".join(sku).replace(' ', '').replace('\n', '').replace('SKU:', '') review = tree.xpath("//div[@class='comments']/a/span/text()") review = "".join(review).strip('\n').replace(' ', '') sizes = tool.tools.get_size(pid) if review == '': review = 0 img_urls = tree.xpath( "//div[@class='swiper-wrapper']/div/img/@data-src") # img_counts = tree.xpath("//div[@class='vertical-wrap']/img/@data-src") color_urls = tree.xpath("//div[@class='opt-color']/a/@href") color_imgs = tree.xpath("//div[@class='opt-color']/a/@style") # 判断是否有相同的id ProductCheckSku = tool.tools.ProductCheck(sku) if ProductCheckSku == 0: # 判断是否有多个颜色 this_url = url if color_urls: color_urls = color_urls[1:] for color_url in color_urls: color_url = color_url + '' # color_url = str(color_url.encode('UTF-8')) self.son_resolution(color_url, pid) data_process.get_data(pid, cid, this_url, title, price, orig_price, description, size_fits, sku, review, img_urls, color_urls, color_imgs, sizes) else: print("-----Parent-url-repetition-----")
X = MaxPooling3D((2, 2, 2), strides = (2, 2, 2))(X) X = Conv3D(2, (1, 1, 1), strides = (2, 2, 2), name = 'conv2', kernel_initializer = glorot_uniform(seed = 0))(X) X = BatchNormalization(axis = 3, name = bn_name_base + '2b')(X) X = Activation('relu')(X) X = MaxPooling3D((1, 2, 2), strides = (2, 2, 2))(X) # Output layer. X = Flatten()(X) X = Dense(classes, activation = 'softmax', name = 'fc' + str(classes), kernel_initializer = glorot_uniform(seed = 0))(X) # Create model. model = Model(inputs = X_input, outputs = X, name = '3Dlipreader') return model if __name__ == '__main__': setup() print 'Gathering data...' x_train, y_train, x_test, y_test = get_data(DATASET_PATH, TRAIN_SPLIT, NUM_FRAMES_PER_TENSOR, 'rgb') model = get_model_from_architecture(input_shape = INPUT_DIM, classes = 2) model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy']) model.fit(x_train, y_train, epochs = NUM_EPOCHS, batch_size = BATCH_SZ) predictions = model.evaluate(x_test, y_test) print "Loss = " + str(predictions[0]) print "Test accuracy = " + str(predictions[1])
data['体检日期'] = (pd.to_datetime(data['体检日期']) - parse('2017-10-09')).dt.days # data.fillna(data.median(axis=0), inplace=True) data.dropna(inplace=True) scaler_columns = [i for i in data.columns if i != 'id' and i != '血糖'] scaler = MinMaxScaler() data[scaler_columns] = scaler.fit_transform(data[scaler_columns]) train_feat = data[data.id.isin(train_id)] test_feat = data[data.id.isin(test_id)] train_feat = train_feat.drop(['id'], axis=1) test_feat = test_feat.drop(['id'], axis=1) return train_feat, test_feat # train_feat, test_feat = make_feat(train, test) train_feat, test_feat = get_data(data_path) # train_feat['血糖'] = np.log(train_feat['血糖']) predictors = [f for f in test_feat.columns if f not in ['血糖']] def evalerror(pred, df): label = df.get_label().values.copy() score = mean_squared_error(label, pred) * 0.5 return ('0.5mse', score, False) print('开始训练...') params = { 'learning_rate': 0.01, 'boosting_type': 'gbdt',
def data_op(file_num=None, alg_choice=None): """ This function takes in a file/s and an algorithm, and passes the given file data to the chosen algorithm. Note: each of these parameters are optional in the event that the user does not select a choice from either of their respective ComboBoxes, however if either is left as None this function will exit itself. Args: file_num (String): the amount of files chosen by the user. Either single or multiple files, or a test sinusoid. alg_choice (String): the user's chosen algorithm. """ # Maps the algorithm choices to numbers for compatibility with algorithms.py alg_dict = { 'Time Series': '1', 'Lomb-Scargle': '2', 'Autocorrelation': '3', 'Wavelets': '4', 'GPS': '5', 'All': '6' } # Prevents program from crashing in the event that the user doesn't select properly. if file_num is None or file_num == "Select" or alg_choice is None or alg_choice == "Select": tk.messagebox.showinfo( "Error", "Please select both a file/folder and an algorithm") elif file_num == "Single File": # Prevents program from crashing in the event that the user closes the file selection window. if not files: tk.messagebox.showinfo("Error", "Error: No Files Selected") return # Also prevents program from crashing in the event that the user closes the file selection window. elif files[0] == "" or files[0] is None: tk.messagebox.showinfo("Error", "Error: No Files Selected") return else: print(files[0]) File_Management.read_input_file(files[0]) time, detrended_flux, background = data_process.get_data() time = [float(data) for data in time] detrended_flux = [float(data) for data in detrended_flux] noise = [float(data) for data in background] alg_choice = alg_dict[alg_choice] alg.selection(time, detrended_flux, alg_choice) elif file_num == "Multiple Files": # Iterates through the files in the selected folder and passes each one through the chosen algorithm. # One potential issue with this is if the user intends to pass files through different algorithms. for path in files: # Prevents program from crashing in the event that the user chooses a folder containing bad file types. if not (path.endswith('.csv') or path.endswith('.fits')): continue File_Management.read_input_file(path) time, detrended_flux, background = data_process.get_data() time = [float(data) for data in time] detrended_flux = [float(data) for data in detrended_flux] noise = [float(data) for data in background] alg_new = alg_dict[alg_choice] alg.selection(time, detrended_flux, alg_new) # This option is not currently functional when used in sequence with a .csv file. elif file_num == "Test Sinusoid": data_process.create_sin() time, detrended_flux, background = data_process.get_data() time = [float(data) for data in time] detrended_flux = [float(data) for data in detrended_flux] noise = [float(data) for data in background] alg_choice = alg_dict[alg_choice] alg.selection(time, detrended_flux, alg_choice)
#!/usr/bin/python import sklearn #importing the basic library required from sklearn.preprocessing import MinMaxScaler import sys import numpy as np sys.path.append("../tools/") from data_process import get_data user_data1, user_id1, problem_data1, train_submission1 = get_data() ''' we have the following things with us now user_data1 : the performance of a particular user is given and some features like his/her level , problems solved etc are present submission_count , problem_solved , contribution , follower_count , max_rating , rating ,rank user_id1 : it has only user id's in it and the data related to its corresponding columns in user_data problem_data : it has the description of a particular problem , i.e its id(int) and the difficulty. train_submission1 : it has 4 columns which are (all are ints) 1) user_id 2) problem_id 3) attempts_range 4) difficulty on a scale of 1 to 14 (both included)