def main(): # enable TensorFlow logging tf.logging.set_verbosity(tf.logging.INFO) tf_logging._get_logger().propagate = False # fix double messages parser = argparse.ArgumentParser() parser.add_argument('-t', '--model-type', type=str, default='multiclass', help='Model type') parser.add_argument('-d', '--model-dir', type=str, help='Model\'s directory') parser.add_argument('-c', '--config', type=str, help='Path to the config') args = parser.parse_args() model_type = args.model_type model_dir = args.model_dir config_path = args.config # default values for local testing if not model_dir and not config_path: model_dir = 'training/%s/local/test1' % model_type config_path = 'configs/%s/default.yaml' % model_type # train and evaluate the model train_model(model_type, model_dir, config_path)
def set_logger(file_name=None): """ Writing logs to a file if file_name, the handler needs to be closed by `close_logger()` after use. :param file_name: :return: """ # pylint: disable=no-name-in-module from tensorflow.python.platform.tf_logging import _get_logger logger = _get_logger() tf.logging.set_verbosity(tf.logging.INFO) logger.handlers = [] # adding console output f = log.Formatter(CONSOLE_LOG_FORMAT) std_handler = log.StreamHandler(sys.stdout) std_handler.setFormatter(f) logger.addHandler(std_handler) if file_name: # adding file output f = log.Formatter(FILE_LOG_FORMAT) file_handler = log.FileHandler(file_name) file_handler.setFormatter(f) logger.addHandler(file_handler)
def main(): # enable TensorFlow logging tf.logging.set_verbosity(tf.logging.INFO) tf_logging._get_logger().propagate = False # fix double messages # directory with the exported model saved_model_dir = root_dir('export/final_model') # image size that the model accepts image_size = 48 # load the images from the dataset _, imgs = load_data() # get test images and crop them to the right size imgs = get_test_dataset(imgs, image_size) # load the model predict_fn = tf.contrib.predictor.from_saved_model(saved_model_dir) # get predictions res = predict_fn({'image': imgs}) # print predicted spikes pprint(res['spikes'])
def main(): # enable TensorFlow logging tf.logging.set_verbosity(tf.logging.INFO) tf_logging._get_logger().propagate = False # fix double messages parser = argparse.ArgumentParser() parser.add_argument('-t', '--model-type', type=str, default='multiclass', help='Model type') parser.add_argument('-g', '--experiment-group', type=str, default='local', help='Experiment group') parser.add_argument('-n', '--experiment-name', type=str, default='test1', help='Experiment name') args = parser.parse_args() tune_hyperparameters(args.model_type, args.experiment_group, args.experiment_name)
def main(_): import logging import sys from tensorflow.python.platform import tf_logging logging.basicConfig(level=logging.DEBUG, stream=sys.stderr, format='%(levelname)s ' '%(asctime)s.%(msecs)06d: ' '%(filename)s: ' '%(lineno)d ' '%(message)s', datefmt='%Y-%m-%d %H:%M:%S') tf_logger = tf_logging._get_logger() tf_logger.propagate = False print_flags() if not tf.gfile.Exists(FLAGS.eval_log_dir): tf.gfile.MakeDirs(FLAGS.eval_log_dir) dataset = datasets.create_dataset() model = models.create_model(num_char_classes=dataset.num_char_classes, max_seq_len=dataset.max_seq_len, null_code=dataset.null_code) data = data_loader.get_data(dataset) endpoints = model.create_base(data.images, is_training=False) eval_ops, prediction, label = model.create_eval_ops(data, endpoints) tf.train.get_or_create_global_step() session_config = tf.ConfigProto(allow_soft_placement=True) session_config.gpu_options.allow_growth = True if FLAGS.eval_type == 'once': slim.evaluation.evaluate_once(master=FLAGS.master, checkpoint_path=FLAGS.ckpt_path, logdir=FLAGS.eval_log_dir, num_evals=FLAGS.num_batches, eval_op=eval_ops, session_config=session_config) elif FLAGS.eval_type == 'loop': slim.evaluation.evaluation_loop( master=FLAGS.master, checkpoint_dir=FLAGS.train_log_dir, logdir=FLAGS.eval_log_dir, eval_op=eval_ops, num_evals=FLAGS.num_batches, eval_interval_secs=FLAGS.eval_interval_secs, max_number_of_evaluations=FLAGS.number_of_steps, timeout=2000, session_config=session_config) else: pass
def wrap_tf_logger(self, task_name): tf_logger = tf_logging._get_logger() formatter = logging.Formatter('%(asctime)s - %(message)s') fh = logging.FileHandler(self.loc(task_name)) ch = logging.StreamHandler() fh.setFormatter(formatter) ch.setFormatter(formatter) tf_logger.handlers = [fh, ch] tf_logger.addFilter(TFFilter()) tf_logger.propagate = False
def main(): # enable TensorFlow logging tf.logging.set_verbosity(tf.logging.INFO) tf_logging._get_logger().propagate = False # fix double messages parser = argparse.ArgumentParser() parser.add_argument('-t', '--model-type', type=str, default='multiclass', help='Model type') parser.add_argument('-d', '--model-dir', type=str, required=True, help='Model\'s directory') parser.add_argument('-n', '--submission-num', type=int, required=True, help='Submission number') args = parser.parse_args() # generate submission and copy the model generate_submission(args.model_type, args.model_dir, args.submission_num)
def main(_): print_flags() import logging import sys from tensorflow.python.platform import tf_logging logging.basicConfig(level=logging.DEBUG, stream=sys.stderr, format='%(levelname)s ' '%(asctime)s.%(msecs)06d: ' '%(filename)s: ' '%(lineno)d ' '%(message)s', datefmt='%Y-%m-%d %H:%M:%S') tf_logger = tf_logging._get_logger() tf_logger.propagate = False os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpus prepare_training_dir() dataset = datasets.create_dataset() model = models.create_model( num_char_classes=dataset. num_char_classes, # represents `num_labels + 1` classes max_seq_len=dataset.max_seq_len, null_code=dataset.null_code) hparams = get_training_hparams() # If ps_tasks is zero, the local device is used. When using multiple # (non-local) replicas, the ReplicaDeviceSetter distributes the variables # across the different devices. device_setter = tf.train.replica_device_setter(FLAGS.ps_tasks, merge_devices=True) with tf.device(device_setter): data = data_loader.get_data(dataset) endpoints = model.create_base(data.images, is_training=True) total_loss = model.create_loss(data, endpoints) init_fn = model.create_init_fn(FLAGS.checkpoint) # print(tf.trainable_variables('CRNN')) if FLAGS.show_graph_stats: logging.info('Total number of weights in the graph: %s', profile_graph()) train(total_loss, init_fn, hparams)
def initLogging(verbosity=0): """Setup logging with a given verbosity level""" # tensorflow logging is a mess, disable the default handler or it will dupe every log from tensorflow.python.platform import tf_logging tf_logger = tf_logging._get_logger() tf_logger.handlers = [] # import logging.config # logging.config.fileConfig('logging_config.ini', disable_existing_loggers=False) logging.basicConfig() if verbosity == 0: logging.root.setLevel(logging.WARN) if verbosity == 1: logging.root.setLevel(logging.INFO) if verbosity > 1: logging.root.setLevel(logging.DEBUG)
def close_logger(): """ Close file-based outputs :return: """ # pylint: disable=no-name-in-module from tensorflow.python.platform.tf_logging import _get_logger logger = _get_logger() for handler in reversed(logger.handlers): try: handler.flush() handler.close() logger.removeHandler(handler) except (OSError, ValueError): pass
def setup_tensorflow(device: Union[str, int, Sequence[int], Sequence[str]], allow_growth: bool): """Setup tensorflow session according to gpu configuration. Args: device (Union[str, int, Sequence[int], Sequence[str]]): GPU or list of GPUs to run on allow_growth (bool): Whether to capture all memory on gpu or grow as necessary Returns: sess (tf.Session): Tensorflow Session object as the default session """ if isinstance(device, int): device = str(device) elif isinstance(device, list): device = ', '.join([str(d) for d in device]) elif not isinstance(device, str): raise ValueError( "Unrecognized device type. Expected int, str, or list. " "Received {}.".format(type(device))) os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = device os.environ["TF_CPP_MIN_LOG_LEVEL"] = "1" # disable tensorflow info logging tf.logging.set_verbosity(tf.logging.WARN) from tensorflow.python.platform import tf_logging try: # tensorflow == 1.13 tf_logging.get_logger().propagate = False except AttributeError: # tensorflow <= 1.12 tf_logging._get_logger().propagate = False gpu_options = tf.GPUOptions(allow_growth=allow_growth) conf = tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options) sess = tf.get_default_session() if sess is None: sess = tf.Session(config=conf) sess.__enter__() # type: ignore np.set_printoptions(suppress=True) return sess
def main(): # enable TensorFlow logging tf.logging.set_verbosity(tf.logging.INFO) tf_logging._get_logger().propagate = False # fix double messages parser = argparse.ArgumentParser() parser.add_argument('-t', '--model-type', type=str, default='multiclass', help='Model type') parser.add_argument('-d', '--model-dir', type=str, required=True, help='Model\'s directory') parser.add_argument('-s', '--checkpoint-step', type=int, help='Export the model from the checkpoint taken on ' 'the specified global step.') args = parser.parse_args() # load the model config config = load_model_config(args.model_dir) # create the model builder model_builder = create_builder(args.model_type, config) # train and evaluate the model export(model_builder, args.model_dir, args.checkpoint_step)
def get_logger(logger_name=None, log_file: Path = None, level=logging.DEBUG): # "log/data-pipe-{}.log".format(datetime.now().strftime("%Y-%m-%d-%H-%M-%S")) if logger_name is None: logger = tf_logging._get_logger() else: logger = logging.getLogger(logger_name) if not logger.hasHandlers(): formatter = logging.Formatter(LOG_FORMAT) logger.setLevel(level) if log_file is not None: log_file.parent.mkdir(parents=True, exist_ok=True) fileHandler = logging.FileHandler(log_file, mode="w") fileHandler.setFormatter(formatter) logger.addHandler(fileHandler) streamHandler = logging.StreamHandler() streamHandler.setFormatter(formatter) logger.addHandler(streamHandler) return logger
# ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. from __future__ import absolute_import, division, print_function import argparse import json import logging as _logging import numpy as np import os import sys as _sys import tensorflow as tf from tensorflow.python.platform import tf_logging tf.logging.set_verbosity(tf.logging.DEBUG) _handler = _logging.StreamHandler(_sys.stdout) tf_logger = tf_logging._get_logger() tf_logger.handlers = [_handler] def cnn_model_fn(features, labels, mode): """Model function for CNN.""" # Input Layer # Reshape X to 4-D tensor: [batch_size, width, height, channels] # MNIST images are 28x28 pixels, and have one color channel input_layer = tf.reshape(features["x"], [-1, 28, 28, 1]) # Convolutional Layer #1 # Computes 32 features using a 5x5 filter with ReLU activation. # Padding is added to preserve width and height. # Input Tensor Shape: [batch_size, 28, 28, 1] # Output Tensor Shape: [batch_size, 28, 28, 32]
def main(): parser = argparse.ArgumentParser( description='Petastorm/Sagemaker/Tensorflow MNIST Example') # Data, model, and output directories # model_dir is always passed in from SageMaker. By default this is a S3 path under the default bucket. parser.add_argument('--model_dir', type=str) parser.add_argument('--sm-model-dir', type=str, default=os.environ.get('SM_MODEL_DIR')) parser.add_argument('--train', type=str, default=os.environ.get('SM_CHANNEL_TRAINING')) parser.add_argument('--hosts', type=list, default=json.loads(os.environ.get('SM_HOSTS'))) parser.add_argument('--current-host', type=str, default=os.environ.get('SM_CURRENT_HOST')) parser.add_argument('--dataset-url', type=str, metavar='S', help='S3:// URL to the MNIST petastorm dataset') parser.add_argument('--training_steps', type=int, default=300) parser.add_argument('--evaluation_steps', type=int, default=10) parser.add_argument('--log_step_count_steps', type=int, default=100) parser.add_argument('--save_checkpoints_steps', type=int, default=500) parser.add_argument('--save_summary_steps', type=int, default=50) parser.add_argument('--throttle_secs', type=int, default=10) parser.add_argument('--prefetch_size', type=int, default=16) parser.add_argument('--num_parallel_batches', type=int, default=1) parser.add_argument('--batch_size', type=int, default=256) args = parser.parse_args() tf.logging.set_verbosity(tf.logging.DEBUG) # TF 1.13 and 1.14 handle logging a bit different, so wrapping the logging setup in a try/except block try: tf_logger = tf_logging._get_logger() handler = tf_logger.handlers[0] handler.setFormatter( _logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s')) except: pass # In 1.14, a multi-worker synchronous training can be achieved using CollectiveAllReduceStrategy per # See https://github.com/tensorflow/tensorflow/issues/23664 # Without providing train_distribute, I believe asynchronous training is done run_config = tf.estimator.RunConfig( save_checkpoints_steps=args.save_checkpoints_steps, log_step_count_steps=args.log_step_count_steps, save_summary_steps=args.save_summary_steps, ) model_dir_parent_path = args.model_dir[:-5] model_dir_parent = model_dir_parent_path.split("/")[-2] print( f"Launch tensorboard by running the following in terminal:\n" + "aws s3 sync {model_dir_parent_path} ~/Downloads/{model_dir_parent} && " + "tensorboard --logdir=~/Downloads/{model_dir_parent}") estimator = tf.estimator.Estimator(model_fn=model_fn, model_dir=args.model_dir, params={"batch_size": args.batch_size}, config=run_config) workers = json.loads(os.environ['SM_HOSTS']) worker_index = workers.index(os.environ['SM_CURRENT_HOST']) nr_workers = len(workers) print( f"Inside training script on worker with (0-based) index {worker_index} out of {nr_workers - 1}." ) with make_reader(os.path.join(args.dataset_url, 'train'), num_epochs=None, cur_shard=worker_index, shard_count=nr_workers, workers_count=nr_workers) as train_reader: with make_reader(os.path.join(args.dataset_url, 'test'), num_epochs=None, cur_shard=0, shard_count=1) as eval_reader: train_fn = lambda: _input_fn(reader=train_reader, batch_size=args.batch_size, num_parallel_batches=args. num_parallel_batches) eval_fn = lambda: _input_fn(reader=eval_reader, batch_size=args.batch_size, num_parallel_batches=args. num_parallel_batches) train_spec = tf.estimator.TrainSpec(input_fn=train_fn, max_steps=args.training_steps) eval_spec = tf.estimator.EvalSpec(input_fn=eval_fn, throttle_secs=args.throttle_secs, steps=args.evaluation_steps) tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
def _main(): # commandline argument parser parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description='Random Forest Classifier based on Tensorflow (TM)', epilog=textwrap.dedent('''\ Notes: You *MUST* specify at least one of the -r, -t or -x options Examples: - Simple training and testing: %(prog)s -t cat1.fits -x cat2.fitsi -c "Class" - Train the classifier and save the trained model: %(prog)s --train traincat.fits --model-dir ./mymodel/ - Load saved model and run it on a catalogl: %(prog)s --model-dir ./mymodel --run mycat.fits - Train the classifier with a timeout of 1 day and 6 hours %(prog)s --train traincat.votable --train_timeout 1d3h %(prog)s --train traincat.votable --train_timeout 1.25d %(prog)s --train traincat.votable --train_timeout 30h - Train the classifier with a loss treshold of 0.001 %(prog)s --train train.csv --loss-treshold 0.001 For more info import the classtf module in python and run >>> help(classtf) ''')) parser.add_argument('-r', '--run', type=str, action='store', metavar='FILE', dest='run_filename', help="If specified, the classifier is run using\ the dataset %(metavar)s as input") parser.add_argument('-t', '--train', type=str, action='store', metavar='FILE', dest='train_filename', help="If specified, the classifier is trained using\ the dataset %(metavar)s") parser.add_argument('-x', '--test', type=str, action='store', metavar='FILE', dest='test_filename', help="If specified, the classifier is tested using\ the dataset %(metavar)s") parser.add_argument('-f', '--feature-importance', action='store_true', dest='do_feature_importance', help="If specified, the importance of each feature\ is computed. Can be used only if both --train\ and --test options are specified") parser.add_argument('--loss-treshold', type=float, action='store', metavar='VALUE', dest='loss_treshold', default=0.001, help="If specified, the training will stop when the\ loss changes between two cycles becomes smaller\ than %(metavar)s (or when the training timeout\ expires). This option has effect only when the\ option --train is specified and is ignored\ otherwise. If not specified, the default value\ of 0.001 is used.") parser.add_argument('--train-timeout', type=str, action='store', metavar='TIME_INTERVAL', dest="train_timeout", help="If specified set the maximum execution time\ for the training process. %(metavar)s must be\ a string representing a time interval. Allowed\ units are y (years), d (days), h (hours),\ m (minutes), s (seconds) [i.e. 1y2d13h20m13.3s\ If not specified, no timeout is applied") parser.add_argument('-c', '--target', type=str, action='store', metavar='TARGET_FEATURE_ID', dest='class_target', default=-1, help="Set the name or column index of feature used as\ target class during training and testing. If\ not specified, the last column in the dataset\ is used as default") parser.add_argument('--ignore-features', type=str, action='store', metavar='', dest='skip_list', nargs='+', help="List of features that should be ignored") parser.add_argument('-d', '--model-dir', type=str, action='store', metavar='MODEL_DIR', dest='model_dir', help="If specified, the trained model is saved or\ restored from %(metavar)s") parser.add_argument('-n', '--trees', type=int, action='store', metavar='NUM_OF_TREES', dest='num_trees', default=1000, help="If specified, set the number of generated trees\ to %(metavar)s. Otherwisee fallback to the\ default value of 1000 trees") parser.add_argument('-b', '--batch-size', type=int, action='store', metavar='BATCH_SIZE', dest='batch_size', default=4096, help="If specified, set the size of the batch to\ to %(metavar)s, which is the number of object\ used at once during a training/test/run cycle.\ The default value is 4096") parser.add_argument('--depth', type=int, action='store', metavar='NUM_NODES', dest='max_nodes', default=10000, help="If specified, set the maximum number of nodes\ created by the model to %(metavar)s.\ The default value is 10000") parser.add_argument('-v', '--version', action='store_true', dest='show_version', help="Print the program version and exit") args = parser.parse_args() print("") if args.show_version: print("classtf - random forset classifier") print("version {0:d}.{1:d}.{2:d}".format(VERSION_MAJ, VERSION_MIN, VERSION_REV)) print("") sys.exit(0) do_train = _check_file(args.train_filename) do_test = _check_file(args.test_filename) do_run = _check_file(args.run_filename) if args.do_feature_importance and (not do_test or not do_train): print(MSG_ERR_FIMPO) sys.exit(1) if not (do_run or do_test or do_train): parser.print_help() sys.exit(1) # Preliminary sanity checks if not args.model_dir: # if no model directory is specified hten create just # a temporary directoory tmp_dir = tempfile.TemporaryDirectory(prefix="rf-model-") model_dir = tmp_dir.name else: model_dir = args.model_dir # If we have specified the model directory but we are not # performing a training then we want to read the save we saved # early. In this case let's check if it really exists if not os.path.isdir(model_dir): if not args.train_filename: # no training... bailing out print(MSG_ERR_DIR_NOT_FOUND.format(model_dir)) sys.exit(1) elif not os.path.exists(model_dir): # the directory does not exist, let's try to create it try: os.makedirs(model_dir) except OSError as e: if e.errno != errno.EEXIST: # If somehow the direcotry has been created after the # elif invocation, then just ignore the error, it # exists that's what matters print(MSG_ERR_DIR_PERM.format(model_dir)) sys.exit(1) else: print(MSG_ERR_DIR_NOT_FOUND.format(model_dir)) sys.exit(1) # The path exists but is not a directory... bailing ou # NOTE: there is a bug in tensorflow version 1.12.0 that floods the # console with warnings. They do not affect the execution, but are # quite annoying. Let's hijack them to a log file... logfile = os.path.join(model_dir, 'log.txt') tf_logger = tf_logging._get_logger() tf_logger.removeHandler(tf_logger.handlers[0]) tf_logger.addHandler(logging.FileHandler(logfile)) train_timeout = timestr2sec(args.train_timeout) # Loading actual data if args.run_filename: x_data, _ = readinput(args.run_filename, None, args.skip_list) # # Here the actual program starts # Let's create a default tensorflow session # with tf.Session() as sess: ap.conf.max_lines = -1 ap.conf.max_width = -1 rf = RFModel(args.num_trees, args.max_nodes, tf_session=sess, loss_treshold=args.loss_treshold, batch_size=args.batch_size, train_timeout=train_timeout) print("") print(MSG_INFO_MODELDIR.format(model_dir)) print("") if do_train: print("Reading train dataset...") x_train, y_train, x_names, y_names = readinput( args.train_filename, args.class_target, args.skip_list, make_shadows=args.do_feature_importance) t_start = time.time() c_t_start = time.ctime() rf.train(x_train, y_train, model_dir) c_t_end = time.ctime() delta_t = datetime.timedelta(seconds=time.time()-t_start) with open(args.train_filename+'-train_log.txt', 'w') as outf: outf.write("Training starts at {}\n".format(c_t_start)) outf.write(LOG_MODEL_INFO.format(rf)) outf.write("\nFeatures: {}\n".format(x_names)) outf.write("\nTarget class: {}\n".format(y_names)) outf.write("Training ends at {}\n".format(c_t_end)) outf.write("Elapsed time: {}\n".format(delta_t)) del x_train del y_train print("") if do_test: print("Reading test dataset...") x_test, y_test, x_names, y_names = readinput( args.test_filename, args.class_target, args.skip_list, make_shadows=args.do_feature_importance) t_start = time.time() c_t_start = time.ctime() pred, cm = rf.test(x_test, y_test, model_dir) c_t_end = time.ctime() delta_t = datetime.timedelta(seconds=time.time()-t_start) t = Table.read(args.test_filename) newcol_name = 'PRED_CLASS' while newcol_name in t.colnames: newcol_name = '_'+newcol_name # add the predicted classes as last column in the dataset newcol = Column(pred, name=newcol_name) t.add_column(newcol) # save the test output catname = os.path.splitext(args.test_filename)[0] + '-test_output.fits' t.write(catname, format='fits') del t print("\nConfusion matrix:") print(cm) base_scores = score(cm) base_f1 = np.mean(base_scores, axis=0)[-1] # write some info in the test log with open(args.test_filename+'-test_log.txt', 'w') as outf: outf.write("Test starts at {}\n".format(c_t_start)) outf.write(LOG_MODEL_INFO.format(rf)) outf.write("\nFeatures: {}\n".format(x_names)) outf.write("\nTarget class: {}\n".format(y_names)) outf.write('CONFUSION MATRIX\n') outf.write(str(cm).replace('[', ' ').replace(']', '')) print("\n precision recall f1-score") outf.write("\n\n precision recall f1-score\n") fmt_str = "class {0:d}: {1: 8.3f} {2: 8.3f} {3: 8.3f}" for stats in base_scores: print(fmt_str.format(*stats)) outf.write(fmt_str.format(*stats)+'\n') print("") outf.write("Test ends at {}\n".format(c_t_end)) outf.write("Elapsed time: {}\n".format(delta_t)) if args.do_feature_importance: num_features = x_test.shape[-1] t_start = time.time() c_t_start = time.ctime() imp_table = Table( names=['ID', 'FEATURE', 'MDA', 'Z-SCORE', 'IMPORTANCE'], dtype=['uint8', 'S32', 'float32', 'float32', 'float32']) imp_table['ID'].format = 'd' imp_table['FEATURE'].format = '>s' imp_table['MDA'].format = ' 10.6f' imp_table['Z-SCORE'].format = ' 10.6f' imp_table['IMPORTANCE'].format = ' 10.6f' # Using the Boruta algorithm print("Feature importance analysis...") for i in range(num_features): print("Feature {} of {}: ".format(i+1, num_features)) # backup the column data orig_col = x_test[..., i].copy() # shuffle the i-th column np.random.shuffle(x_test[..., i]) # compute the new confusion_matrix _, ith_cm = rf.test(x_test, y_test, model_dir) # restore the column x_test[..., i] = orig_col.copy() del orig_col # compute the average score ith_scores = score(ith_cm) m_f1 = np.mean(ith_scores, axis=0)[-1] imp_table.add_row([i, x_names[i], base_f1 - m_f1, 0, 0]) c_t_end = time.ctime() delta_t = datetime.timedelta(seconds=time.time()-t_start) # computing the Z-score imp_table['Z-SCORE'] = imp_table['MDA'] imp_table['Z-SCORE'] -= imp_table['MDA'].mean() imp_table['Z-SCORE'] /= imp_table['MDA'].std() # Finding the Maximum Z Shadow Accuracy MSZA = max( x['Z-SCORE'] for x in imp_table if x['FEATURE'].startswith('__shadow_') ) imp_table['IMPORTANCE'] = imp_table['Z-SCORE']/MSZA print("\nAnalysis results:") imp_table.sort('IMPORTANCE') imp_table.reverse() imp_table.pprint(align=['>', '>', '>', '>', '>']) with open(args.test_filename+'-fimportance.txt', 'w') as outf: outf.write("FI starts at {}\n\n".format(c_t_start)) outf.write(str(imp_table)) outf.write("\n\nFI ends at {}\n".format(c_t_end)) outf.write("Elapsed time: {}\n".format(delta_t)) del x_test del y_test print("") if do_run: print("Reading input dataset...") x_data, _, x_names, _ = readinput( args.run_filename, None, args.skip_list) pred = rf.run(x_data, model_dir) del x_data # Read the original file t = Table.read(args.run_filename) # Check if the name is already used and if this is the case # then use a different name newcol_name = 'PRED_CLASS' while newcol_name in t.colnames: newcol_name = '_'+newcol_name # add the predicted classes as last column in the dataset newcol = Column(pred, name=newcol_name) t.add_column(newcol) catname = os.path.splitext(args.run_filename)[0] + '-rfout.fits' t.write(catname, format='fits')
#!/mnt/data/anaconda3-cpu-mkl/bin/python import pandas as pd import tensorflow as tf import numpy as np import math import time from tensorflow.python.platform import tf_logging as logging logging._get_logger().setLevel(logging.INFO) start = time.clock() # ### prameters to adjust: # In[ ]: hidden_units = [128, 64, 32] learning_rate = 0.001 batch_size = 2000 num_epochs = 50 l1_regularization_strength = 0.001 hash_bucket_size = 200 #filenames = ["./ext_1.csv"] filenames = ["hdfs://192.168.1.2:4545/census_extended/ext_1.csv"] training_data_pandas = "sample.csv" #to fectch feature name/dtypes and calculate mean & std for categorical columns. target = 'income' delim = ',' label_vocabulary = ["<=50K", ">50K"]
flags.DEFINE_boolean('eval_best_model', False, '') flags.DEFINE_float('min_visl_detection_score', 0.05, '') flags.DEFINE_boolean('run_once', False, '') flags.DEFINE_boolean('eval_coco_on_voc', False, '') flags.DEFINE_string('shard_indicator', '', '') flags.DEFINE_string('input_pattern', '', '') FLAGS = flags.FLAGS try: logging._get_logger().propagate = False except AttributeError: pass STANDARD_COLORS = [ 'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque', 'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite', 'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan', 'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange', 'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet', 'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite', 'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod', 'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki', 'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue', 'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey', 'LightGreen', 'LightPink', 'LightSalmon',