Esempio n. 1
0
        log_level = logging.DEBUG
    else:
        log_level = logging.INFO
    head = '%(asctime)-15s %(message)s'
    logging.basicConfig(level=log_level, format=head)

    # dataset
    assert(dataset in datasets), "unknown dataset " + dataset
    metadata = datasets[dataset]
    feature_dim = metadata['feature_dim']
    if logging:
        logging.debug('preparing data ... ')
    data_dir = os.path.join(os.getcwd(), 'data')
    path = os.path.join(data_dir, metadata['data_name'])
    if not os.path.exists(path):
        get_libsvm_data(data_dir, metadata['data_name'], metadata['url'],
                        metadata['data_origin_name'])
        assert os.path.exists(path)

    # data iterator
    train_data = mx.io.LibSVMIter(data_libsvm=path, data_shape=(feature_dim,),
                                  batch_size=batch_size, num_parts=num_worker,
                                  part_index=rank)
    if dummy_iter:
        train_data = DummyIter(train_data)

    # model
    model = linear_model(feature_dim)

    # module
    mod = mx.mod.Module(symbol=model, data_names=['data'], label_names=['softmax_label'])
    mod.bind(data_shapes=train_data.provide_data, label_shapes=train_data.provide_label)
    num_epoch = args.num_epoch
    kvstore = args.kvstore
    batch_size = args.batch_size
    optimizer = args.optimizer

    # create kvstore
    kv = mx.kvstore.create(kvstore) if kvstore else None
    rank = kv.rank if kv else 0
    num_worker = kv.num_workers if kv else 1

    # dataset
    num_features = AVAZU['num_features']
    data_dir = os.path.join(os.getcwd(), 'data')
    train_data = os.path.join(data_dir, AVAZU['train'])
    val_data = os.path.join(data_dir, AVAZU['test'])
    get_libsvm_data(data_dir, AVAZU['train'], AVAZU['url'])
    get_libsvm_data(data_dir, AVAZU['test'], AVAZU['url'])

    # data iterator
    train_data = mx.io.LibSVMIter(data_libsvm=train_data,
                                  data_shape=(num_features, ),
                                  batch_size=batch_size,
                                  num_parts=num_worker,
                                  part_index=rank)
    eval_data = mx.io.LibSVMIter(data_libsvm=val_data,
                                 data_shape=(num_features, ),
                                 batch_size=batch_size)

    # model
    # The positive class weight, says how much more we should upweight the importance of
    # positive instances in the objective function.
    num_epoch = args.num_epoch
    kvstore = args.kvstore
    batch_size = args.batch_size
    optimizer = args.optimizer

    # create kvstore
    kv = mx.kvstore.create(kvstore) if kvstore else None
    rank = kv.rank if kv else 0
    num_worker = kv.num_workers if kv else 1

    # dataset
    num_features = AVAZU['num_features']
    data_dir = os.path.join(os.getcwd(), 'data')
    train_data = os.path.join(data_dir, AVAZU['train'])
    val_data = os.path.join(data_dir, AVAZU['test'])
    get_libsvm_data(data_dir, AVAZU['train'], AVAZU['url'])
    get_libsvm_data(data_dir, AVAZU['test'], AVAZU['url'])

    # data iterator
    train_data = mx.io.LibSVMIter(data_libsvm=train_data, data_shape=(num_features,),
                                  batch_size=batch_size, num_parts=num_worker,
                                  part_index=rank)
    eval_data = mx.io.LibSVMIter(data_libsvm=val_data, data_shape=(num_features,),
                                 batch_size=batch_size)

    # model
    # The positive class weight, says how much more we should upweight the importance of
    # positive instances in the objective function.
    # This is used to combat the extreme class imbalance.
    positive_class_weight = 2
    model = linear_model(num_features, positive_class_weight)