def produce(): with DataIter( '10.9.135.235', b'midas_ctr_pro', filter_str, request, ) as d: for i in d.get_data(batch_size=128): MY_QUEUE.put(i) MY_QUEUE.put("done")
def produce(filter_str, request, train_mode): #hbase(host), 就是ip # table, 注意 这是midas offline with DataIter( "10.9.75.202", b'midas_offline', filter_str, request, train_mode, ) as d: for i in d.get_data(batch_size=128, train_mode=train_mode): MY_QUEUE.put(i) #一直取,i 是一个批次,执行yeild下面的程序 data=[],队列的数据的单位是一个批次数据 MY_QUEUE.put("done")
def train_model(remove_features, label_names, train_path, val_path, batch_size, seq_len, num_epoch, ctx, load_epoch, name): import os from test import predict, score, make_submission data_names = [ i[:-4] for i in os.listdir(train_path) if i.endswith('.npy') and not (i[:-4] in remove_features) ] train_iter = SeqDataIter(train_path, batch_size, data_names=data_names, label_names=label_names, shuffle=True, usampling=True, seq_len=seq_len) val_iter = SeqDataIter(val_path, batch_size, data_names=data_names, label_names=label_names, shuffle=False, max_len=batch_size * 1000, seq_len=seq_len) sym = make_network(train_iter, seq_len) sym = add_loss(sym) model = train(sym, train_iter, val_iter, name=name, load_epoch=load_epoch, batch_size=batch_size, exp_dir=exp_dir) test_iter = DataIter(test_path, batch_size, data_names=data_names, label_names=[], shuffle=False) score(val_iter, 7, name) prediction = predict(test_iter, 7, name) make_submission(prediction, name)
def preprocess_data(remove_features, label_names, train_path, val_path, preprocess_path, original_data_root): preprocess_parquet_table(os.path.join(original_data_root,'collabTrain'),preprocess_path,\ cols=parquet.read_table(os.path.join(original_data_root,'collabTrain')).to_pandas().columns) preprocess_parquet_table(os.path.join(original_data_root,'collabTest'),test_path,\ cols=parquet.read_table(os.path.join(original_data_root,'collabTest')).to_pandas().columns,\ params_path=preprocess_path) data_names = [ i[:-4] for i in os.listdir(preprocess_path) if i.endswith('.npy') and not (i[:-4] in remove_features) ] data_iter = DataIter(preprocess_path, 100, data_names=data_names, label_names=label_names, shuffle=True) train = data_iter.inx[1000000:] val = data_iter.inx[:1000000] for k, v in data_iter.data.items(): np.save(os.path.join(val_path, '/%s.npy' % k), v[val]) np.save(os.path.join(train_path, '/%s.npy' % k), v[train])
ans[vid_list[i]] = output check_file.close() return ans if __name__ == '__main__': ctx = mx.cpu() start_time = time() ans_dict = json.load(open('feature/ans_dict.json')) num_category = len(ans_dict) net = model.Net1(num_category) start_epoch = load_params(net, './', ctx) test_img = np.load('feature/test_image.npy') print("Total test image:", test_img.shape[0]) test_q = np.load('feature/test_question.npy') print("Total test question:", test_q.shape[0]) data_test = DataIter(test_img, test_q, np.zeros(test_img.shape[0] * 15)) ans_idx = predict(net, data_test, ctx) print('predict result shape is: ', len(ans_idx)) predict_time = time() output_data(ans_dict, ans_idx)
lr_stepnum = np.int(np.ceil(lr_stepnum)) dlr_steps = [dlr * i for i in xrange(1, lr_stepnum + 1)] print 'lr_start:%.1e, lr_min:%.1e, lr_reduce:%.2f, lr_stepsnum:%d' % ( lr_start, lr_min, lr_reduce, lr_stepnum) # print dlr_steps lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(dlr_steps, lr_reduce) # param_prefix = 'MDL_PARAM/params5_proxy_nca-8wmargin_20180724_dim_512_bn/person_reid-back' param_prefix = './' load_paramidx = 0 #None # DataBatch test # data_path_prefix = '/train/trainset/list_clean_28w_20180803' data_path_prefix = '/train/execute/improve_partial_fc/dataset/list_clean_28w_20180803' data_iter = DataIter(prefix=data_path_prefix, image_shapes=data_shape, data_nthreads=4) # simple DataBatch test # data_batch = mx.random.normal(0, 1.0, shape=data_shape) # data_label = mx.nd.array(range(64), dtype='int32') # data_test = mx.io.DataBatch(data=[data_batch], label=[data_label]) data = mx.sym.Variable('data') part_net = create_net(data, radius) mxmod = mixModule(symbol=part_net, context=ctx, handle=handle, data_shape=data_shape, proxy_Z=proxy_Z, K=999)
if __name__ == '__main__': # Please change the following accordingly seed = 2 #main_path = '/home/ziqizeng/Documents/Study/CSCI599_Deep_Learning/Project/final/data/' step = 4 batch_size = 5 train_portion = 0.2 num_workers = 2 num_channels = 1 class_name = ['pressure', 'velocity', 'density'] config = Config(1e-5, 10, seed, 2) for i in range(1): data_iter = DataIter(class_name[i], shuffle_list_seed=seed) file_cnt = len(data_iter) print(file_cnt) indices = list(range(file_cnt)) split = int(np.floor(file_cnt * train_portion)) print(split) train_sampler = SubsetRandomSampler(indices[:split]) test_sampler = SubsetRandomSampler(indices[split:]) train_data = DataLoader(data_iter, batch_size=batch_size, num_workers=num_workers, pin_memory=False, sampler=train_sampler) test_data = DataLoader(data_iter, batch_size=batch_size, num_workers=num_workers, pin_memory=False, sampler=test_sampler)