def get_model_input(train_file, test_file): train_numeric_ft, train_categorical_ft, train_target = data_transform( train_file) test_numeric_ft, test_categorical_ft, test_target = data_transform( test_file) train_numeric = train_numeric_ft.values.reshape(-1, Number_of_RNN, len(Numeric_Column)) # print train_numeric.shape train_version = train_categorical_ft[[Categorical_Column[0] ]].values.reshape( -1, Number_of_RNN, 1) train_missing = train_categorical_ft[[Categorical_Column[1] ]].values.reshape( -1, Number_of_RNN, 1) y_train = train_target.values[::Number_of_RNN] test_numeric = test_numeric_ft.values.reshape(-1, Number_of_RNN, len(Numeric_Column)) test_version = test_categorical_ft[[Categorical_Column[0]]].values.reshape( -1, Number_of_RNN, 1) test_missing = test_categorical_ft[[Categorical_Column[1]]].values.reshape( -1, Number_of_RNN, 1) y_test = test_target.values[::Number_of_RNN] y_train = utils.to_categorical(y_train, 2) y_test = utils.to_categorical(y_test, 2) ''' Add a function to dupliate the label == 1 observations in trainning set ''' train_numeric = duplicate_portion(train_numeric) train_version = duplicate_portion(train_version) train_missing = duplicate_portion(train_missing) y_train = duplicate_portion(y_train) x_train = [train_numeric, train_version, train_missing] x_test = [test_numeric, test_version, test_missing] return x_train, y_train, x_test, y_test
def get_model_input_submission(test_file): test_numeric_ft, test_categorical_ft, test_target = data_transform( test_file) test_numeric = test_numeric_ft.values.reshape(-1, Number_of_RNN, len(Numeric_Column)) test_version = test_categorical_ft[[Categorical_Column[0]]].values.reshape( -1, Number_of_RNN, 1) test_missing = test_categorical_ft[[Categorical_Column[1]]].values.reshape( -1, Number_of_RNN, 1) x_test = [test_numeric, test_version, test_missing] return x_test
import data_transform use_pretrained = True net = models.vgg16(pretrained=use_pretrained) print("vgg16: \n", net) net.features[26] = nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(0, 0)) new_features = nn.Sequential(*list(net.features.children())[:-3]) net.features = new_features net.classifier = nn.Sequential( nn.Linear(in_features=73728, out_features=18, bias=True), nn.ReLU(True), nn.Linear(in_features=18, out_features=3, bias=True) # nn.ReLU(True) ) print("original network: \n", net) image_path = "/home/amsl/ros_catkin_ws/src/save_dataset/dataset/example.jpg" img = Image.open(image_path) acc = np.array([0, 0, 1]) transform = data_transform.data_transform(224, (0.5, 0.5, 0.5), (0.25, 0.25, 0.25)) img_transformed, _ = transform(img, acc, phase="train") inputs = img_transformed.unsqueeze_(0) outputs = net(inputs) print("img_transformed.size() = ", img_transformed.size()) print("inputs.size() = ", inputs.size()) print("outputs.size() = ", outputs.size())
## list train_rootpath = "/home/amsl/ozaki/airsim_ws/pkgs/airsim_controller/save/train" val_rootpath = "/home/amsl/ozaki/airsim_ws/pkgs/airsim_controller/save/val" csv_name = "imu_camera.csv" train_list = make_datapath_list.make_datapath_list(train_rootpath, csv_name) val_list = make_datapath_list.make_datapath_list(val_rootpath, csv_name) ## trans param size = 224 #VGG16 mean = ([0.25, 0.25, 0.25]) std = ([0.5, 0.5, 0.5]) ## dataset train_dataset = original_dataset.OriginalDataset( data_list=train_list, transform=data_transform.data_transform(size, mean, std), phase="train" ) val_dataset = original_dataset.OriginalDataset( data_list=val_list, transform=data_transform.data_transform(size, mean, std), phase="val" ) # dataloader batch_size = 32 train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True) val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False) dataloaders_dict = {"train": train_dataloader, "val": val_dataloader}
import pickle import thulac import jieba import numpy as np jieba.setLogLevel('WARN') num_words = 40000 maxlen = 400 # original_dataname = "data_valid" # original_dataname = "data_valid" original_dataname = "data_train" ######################################################################################## # 数据集处理 data_transform = data_transform() # 读取json文件 data_transform.read_data(path="./data_original/" + original_dataname + ".json") # 创建数据one-hot标签 data_transform.extract_data(name='accusation') # data_transform.extraction['accusation'] # print(data_transform.extraction['accusation']) #[['故意伤害'], ['故意伤害'], ['故意伤害'], ['故意伤害'], ['故意伤害'], ['故意伤害'], ['故意伤害'], ['故意伤害'], ['故意伤害'], ['故意伤害'], ['妨害公务', '故意伤害', '盗窃']] data_transform.creat_label_set(name='accusation') labels = data_transform.creat_labels(name='accusation') # 案件个数*202 # print(len(big_labels),len(big_labels[0]),big_labels[0]) np.save('./data_deal/data_model_use/labels/' + original_dataname + '_labels_accusation.npy', labels) #形状 案件个数 * 202
money_type = 'OTM' else: money_type = 'ATM' else: if row['Strike'] < row['underlying_price_x']: money_type = 'OTM' elif row['Strike'] > row['underlying_price_x']: money_type = 'ITM' else: money_type = 'ATM' return money_type startDate = datetime.date.today() - datetime.timedelta(days=1) endDate = datetime.date.today() data_transform(startDate, endDate) project_dir = directory() option_data = option_data() tradeHour = datetime.time(16, 0, 0) delta_T = 1 maturity_T = 30 dateList = option_data.get_perp_date_list() option_file_df = option_data.get_option_df() option_file_by_date = option_file_df.groupby('Date') interest_rate = 0 dividend = 0 day_count = 0 realtime_dir = get_realtime_data() orderbook = realtime_dir.get_realtime_orderbook(startDate) realtime_all_mtr_options = realtime_dir.get_tradetime_vol(
from data_transform import data_transform import json import pickle import jieba import numpy as np jieba.setLogLevel('WARN') num_words = 20000 maxlen = 400 ######################################################################################## # train数据集处理 data_transform_train = data_transform() # 读取json文件 data_train = data_transform_train.read_data(path='./data/data_train.json') # # 提取需要信息 # data_transform_train.extract_data(name='fact') # train_fact = data_transform_train.extraction['fact'] # # #分词并保存原始分词结果,词语长度后期可以再改 # train_fact_cut=data_transform_train.cut_texts(texts=train_fact,word_len=1,need_cut=True, # texts_cut_savepath='./data_deal/data_cut/train_fact_cut.json') # # #抽取长度大于1的词语,目的在于去除标点和无意义词 # train_fact_cut_new=data_transform_train.cut_texts(texts=train_fact_cut,word_len=2,need_cut=False, # texts_cut_savepath='./data_deal/data_cut/train_fact_cut_new.json') with open('./data_deal/data_cut/train_fact_cut_new.json', 'r') as f: train_fact_cut_new = json.load(f)
from data_transform import data_transform import json import pickle import jieba import numpy as np jieba.setLogLevel('WARN') num_words = 40000 maxlen = 400 ######################################################################################## # big数据集处理 data_transform_big = data_transform() # 读取json文件,1710857行 data_transform_big.read_data(path='./data/cail2018_big.json') # 提取需要信息 data_transform_big.extract_data(name='fact') # big_fact = data_transform_big.extraction['fact'] # 分词并保存原始分词结果,词语长度后期可以再改 for i in range(18): texts=data_transform_big.extraction['fact'][i*100000:(i*100000 + 100000)] big_fact_cut = data_transform_big.cut_texts(texts=texts, word_len=1, need_cut=True) with open('./data_deal/data_cut/big_fact_cut_%d_%d.pkl' % (i*100000, i*100000 + 100000), mode='wb') as f: pickle.dump(big_fact_cut, f) print('finish big_fact_cut_%d_%d' % (i*100000, i*100000 + 100000)) for i in range(18):
## list rootpath = "/home/amsl/ros_catkin_ws/src/save_dataset/dataset/imu_camera_velodyne" csv_name = "imu_color_depth.csv" train_list = make_datapath_list.make_datapath_list(rootpath, csv_name, phase="train") val_list = make_datapath_list.make_datapath_list(rootpath, csv_name, phase="val") ## dataset train_dataset = original_dataset.OriginalDataset( data_list=train_list, transform=data_transform.data_transform(), phase="train") val_dataset = original_dataset.OriginalDataset( data_list=val_list, transform=data_transform.data_transform(), phase="val") # dataloader batch_size = 32 train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True) val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False) dataloaders_dict = {"train": train_dataloader, "val": val_dataloader}