log_level = logging.DEBUG else: log_level = logging.INFO head = '%(asctime)-15s %(message)s' logging.basicConfig(level=log_level, format=head) # dataset assert(dataset in datasets), "unknown dataset " + dataset metadata = datasets[dataset] feature_dim = metadata['feature_dim'] if logging: logging.debug('preparing data ... ') data_dir = os.path.join(os.getcwd(), 'data') path = os.path.join(data_dir, metadata['data_name']) if not os.path.exists(path): get_libsvm_data(data_dir, metadata['data_name'], metadata['url'], metadata['data_origin_name']) assert os.path.exists(path) # data iterator train_data = mx.io.LibSVMIter(data_libsvm=path, data_shape=(feature_dim,), batch_size=batch_size, num_parts=num_worker, part_index=rank) if dummy_iter: train_data = DummyIter(train_data) # model model = linear_model(feature_dim) # module mod = mx.mod.Module(symbol=model, data_names=['data'], label_names=['softmax_label']) mod.bind(data_shapes=train_data.provide_data, label_shapes=train_data.provide_label)
num_epoch = args.num_epoch kvstore = args.kvstore batch_size = args.batch_size optimizer = args.optimizer # create kvstore kv = mx.kvstore.create(kvstore) if kvstore else None rank = kv.rank if kv else 0 num_worker = kv.num_workers if kv else 1 # dataset num_features = AVAZU['num_features'] data_dir = os.path.join(os.getcwd(), 'data') train_data = os.path.join(data_dir, AVAZU['train']) val_data = os.path.join(data_dir, AVAZU['test']) get_libsvm_data(data_dir, AVAZU['train'], AVAZU['url']) get_libsvm_data(data_dir, AVAZU['test'], AVAZU['url']) # data iterator train_data = mx.io.LibSVMIter(data_libsvm=train_data, data_shape=(num_features, ), batch_size=batch_size, num_parts=num_worker, part_index=rank) eval_data = mx.io.LibSVMIter(data_libsvm=val_data, data_shape=(num_features, ), batch_size=batch_size) # model # The positive class weight, says how much more we should upweight the importance of # positive instances in the objective function.
num_epoch = args.num_epoch kvstore = args.kvstore batch_size = args.batch_size optimizer = args.optimizer # create kvstore kv = mx.kvstore.create(kvstore) if kvstore else None rank = kv.rank if kv else 0 num_worker = kv.num_workers if kv else 1 # dataset num_features = AVAZU['num_features'] data_dir = os.path.join(os.getcwd(), 'data') train_data = os.path.join(data_dir, AVAZU['train']) val_data = os.path.join(data_dir, AVAZU['test']) get_libsvm_data(data_dir, AVAZU['train'], AVAZU['url']) get_libsvm_data(data_dir, AVAZU['test'], AVAZU['url']) # data iterator train_data = mx.io.LibSVMIter(data_libsvm=train_data, data_shape=(num_features,), batch_size=batch_size, num_parts=num_worker, part_index=rank) eval_data = mx.io.LibSVMIter(data_libsvm=val_data, data_shape=(num_features,), batch_size=batch_size) # model # The positive class weight, says how much more we should upweight the importance of # positive instances in the objective function. # This is used to combat the extreme class imbalance. positive_class_weight = 2 model = linear_model(num_features, positive_class_weight)