Ejemplo n.º 1
0
def produce():
    with DataIter(
            '10.9.135.235',
            b'midas_ctr_pro',
            filter_str,
            request,
    ) as d:
        for i in d.get_data(batch_size=128):
            MY_QUEUE.put(i)
    MY_QUEUE.put("done")
Ejemplo n.º 2
0
def produce(filter_str, request, train_mode):
    #hbase(host), 就是ip
    # table, 注意 这是midas offline
    with DataIter(
            "10.9.75.202",
            b'midas_offline',
            filter_str,
            request,
            train_mode,
    ) as d:
        for i in d.get_data(batch_size=128, train_mode=train_mode):
            MY_QUEUE.put(i)  #一直取,i 是一个批次,执行yeild下面的程序 data=[],队列的数据的单位是一个批次数据
    MY_QUEUE.put("done")
Ejemplo n.º 3
0
def train_model(remove_features, label_names, train_path, val_path, batch_size,
                seq_len, num_epoch, ctx, load_epoch, name):
    import os
    from test import predict, score, make_submission
    data_names = [
        i[:-4] for i in os.listdir(train_path)
        if i.endswith('.npy') and not (i[:-4] in remove_features)
    ]
    train_iter = SeqDataIter(train_path,
                             batch_size,
                             data_names=data_names,
                             label_names=label_names,
                             shuffle=True,
                             usampling=True,
                             seq_len=seq_len)
    val_iter = SeqDataIter(val_path,
                           batch_size,
                           data_names=data_names,
                           label_names=label_names,
                           shuffle=False,
                           max_len=batch_size * 1000,
                           seq_len=seq_len)

    sym = make_network(train_iter, seq_len)
    sym = add_loss(sym)

    model = train(sym,
                  train_iter,
                  val_iter,
                  name=name,
                  load_epoch=load_epoch,
                  batch_size=batch_size,
                  exp_dir=exp_dir)

    test_iter = DataIter(test_path,
                         batch_size,
                         data_names=data_names,
                         label_names=[],
                         shuffle=False)
    score(val_iter, 7, name)
    prediction = predict(test_iter, 7, name)
    make_submission(prediction, name)
Ejemplo n.º 4
0
def preprocess_data(remove_features, label_names, train_path, val_path,
                    preprocess_path, original_data_root):
    preprocess_parquet_table(os.path.join(original_data_root,'collabTrain'),preprocess_path,\
                             cols=parquet.read_table(os.path.join(original_data_root,'collabTrain')).to_pandas().columns)

    preprocess_parquet_table(os.path.join(original_data_root,'collabTest'),test_path,\
                             cols=parquet.read_table(os.path.join(original_data_root,'collabTest')).to_pandas().columns,\
                             params_path=preprocess_path)
    data_names = [
        i[:-4] for i in os.listdir(preprocess_path)
        if i.endswith('.npy') and not (i[:-4] in remove_features)
    ]
    data_iter = DataIter(preprocess_path,
                         100,
                         data_names=data_names,
                         label_names=label_names,
                         shuffle=True)
    train = data_iter.inx[1000000:]
    val = data_iter.inx[:1000000]
    for k, v in data_iter.data.items():
        np.save(os.path.join(val_path, '/%s.npy' % k), v[val])
        np.save(os.path.join(train_path, '/%s.npy' % k), v[train])
Ejemplo n.º 5
0
            ans[vid_list[i]] = output
    check_file.close()
    return ans


if __name__ == '__main__':

    ctx = mx.cpu()

    start_time = time()

    ans_dict = json.load(open('feature/ans_dict.json'))
    num_category = len(ans_dict)
    net = model.Net1(num_category)

    start_epoch = load_params(net, './', ctx)

    test_img = np.load('feature/test_image.npy')
    print("Total test image:", test_img.shape[0])
    test_q = np.load('feature/test_question.npy')
    print("Total test question:", test_q.shape[0])

    data_test = DataIter(test_img, test_q, np.zeros(test_img.shape[0] * 15))

    ans_idx = predict(net, data_test, ctx)
    print('predict result shape is: ', len(ans_idx))

    predict_time = time()

    output_data(ans_dict, ans_idx)
Ejemplo n.º 6
0
lr_stepnum = np.int(np.ceil(lr_stepnum))
dlr_steps = [dlr * i for i in xrange(1, lr_stepnum + 1)]
print 'lr_start:%.1e, lr_min:%.1e, lr_reduce:%.2f, lr_stepsnum:%d' % (
    lr_start, lr_min, lr_reduce, lr_stepnum)
#     print dlr_steps
lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(dlr_steps, lr_reduce)

# param_prefix = 'MDL_PARAM/params5_proxy_nca-8wmargin_20180724_dim_512_bn/person_reid-back'
param_prefix = './'
load_paramidx = 0  #None

# DataBatch test
# data_path_prefix = '/train/trainset/list_clean_28w_20180803'
data_path_prefix = '/train/execute/improve_partial_fc/dataset/list_clean_28w_20180803'
data_iter = DataIter(prefix=data_path_prefix,
                     image_shapes=data_shape,
                     data_nthreads=4)

# simple DataBatch test
# data_batch = mx.random.normal(0, 1.0, shape=data_shape)
# data_label = mx.nd.array(range(64), dtype='int32')
# data_test = mx.io.DataBatch(data=[data_batch], label=[data_label])

data = mx.sym.Variable('data')
part_net = create_net(data, radius)
mxmod = mixModule(symbol=part_net,
                  context=ctx,
                  handle=handle,
                  data_shape=data_shape,
                  proxy_Z=proxy_Z,
                  K=999)
Ejemplo n.º 7
0

if __name__ == '__main__':
    # Please change the following accordingly
    seed = 2
    #main_path = '/home/ziqizeng/Documents/Study/CSCI599_Deep_Learning/Project/final/data/'
    step = 4
    batch_size = 5
    train_portion = 0.2
    num_workers = 2
    num_channels = 1
    class_name = ['pressure', 'velocity', 'density']

    config = Config(1e-5, 10, seed, 2)
    for i in range(1):
        data_iter = DataIter(class_name[i], shuffle_list_seed=seed)
        file_cnt = len(data_iter)
        print(file_cnt)
        indices = list(range(file_cnt))
        split = int(np.floor(file_cnt * train_portion))

        print(split)

        train_sampler = SubsetRandomSampler(indices[:split])
        test_sampler = SubsetRandomSampler(indices[split:])

        train_data = DataLoader(data_iter, batch_size=batch_size, num_workers=num_workers, pin_memory=False,
                                sampler=train_sampler)
        test_data = DataLoader(data_iter, batch_size=batch_size, num_workers=num_workers, pin_memory=False,
                               sampler=test_sampler)