Exemple #1
0
def run():
	try:
		paths_list = get_paths('20170501')
		i = 0
		while i < 100:
			# df = pd.read_csv(paths_list[i], sep='|', names=['user_id', 'ts', 'rssi', 'AP'])
			# df = get_df_with_index(df, df['ts'])
			# df_0 = df[df['AP'] == '14E4E6E186A4']
			# df_1 = df[df['AP'] == 'EC172FE3B340']
			# print("df: %s, df_0: %s, df_1: %s" % (df, df_0, df_1))
			# i += 1
			# plt.plot(df_0['rssi'])
			# plt.plot(df_1['rssi'])
			# plt.xlabel('time')
			# plt.ylabel('rssi')
			# plt.show()

			with open(paths_list[i], 'r') as fr:
				length = len(fr.readlines())
				date = get_date(paths_list[i])
				user_id = get_uid(paths_list[i])
			with open(paths_list[i], 'r') as fr:
				time_slices_list = []
				prev_line = fr.readline()
				prev_list = prev_line.split("|")
				user_id = prev_list[0]
				prev_ts = int(prev_list[1])
				prev_rssi = int(prev_list[2])
				prev_AP = prev_list[-1].strip()
				time_slices_list.append((prev_ts, prev_rssi, prev_AP))
				i += 1
				j = 1
				while j < length:
					j += 1
					cur_line = fr.readline()
					cur_list = cur_line.split("|")
					cur_ts = int(cur_list[1])
					cur_rssi = int(cur_list[2])
					cur_AP = cur_list[-1].strip()
					if cur_ts - prev_ts <= 120:
						time_slices_list.append((cur_ts, cur_rssi, cur_AP))
					else:
						df = pd.DataFrame(time_slices_list)
						df = get_df_with_index(df, df[0])
						df_0 = df[df[2] == '14E4E6E186A4']
						df_1 = df[df[2] == 'EC172FE3B340']
						print("df: %s, df_0: %s, df_1: %s, user_id: %s" % (df, df_0, df_1, user_id))
						plt.plot(df_0[1])
						plt.plot(df_1[1])
						plt.title(user_id)
						plt.xlabel('time')
						plt.ylabel('rssi')
						plt.show()
						time_slices_list[:] = []
						time_slices_list.append((cur_ts, cur_rssi, cur_AP))
					prev_ts = cur_ts
	except Exception as e:
		raise e
Exemple #2
0
    def make_paths(self):

        if not self.paths:
            self.paths = get_paths(self.database.graph, self.name)

        if not self.local_tables:
            local_tables, one_to_many_tables = make_local_tables(self.paths)
            self.local_tables = local_tables
            self.one_to_many_tables = one_to_many_tables

            self.table_path_list = create_table_path_list(self.paths)
            self.table_path = create_table_path(self.table_path_list, self.name)
Exemple #3
0
def main():
  prtime('starting lgb.py PREPROCESS_VERSION =', PREPROCESS_VERSION, 'OUTPUT_VERSION = ', OUTPUT_VERSION)
  IDIR, ODIR = get_paths()
  train = read32(ODIR + 'train_updated_v'+str(PREPROCESS_VERSION)+'.csv')
  prtime('train reading done')
  gc.collect()
#  if LOG:
#    train.Value = np.log1p(train.Value)
  prtime('reading submission_format')
  submission_format = read32(ODIR + 'submission_format_updated_v'+str(PREPROCESS_VERSION)+'.csv')
  metadata = pd.read_csv(IDIR + 'metadata.csv').set_index('SiteId')

  prtime('generating features for submission')

  train = get_static_features(train, metadata)
  train = get_ratio_features(train)

  submission_format = get_static_features(submission_format, metadata)
  submission_format = get_ratio_features(submission_format)

  print(train.dtypes)
  print(train.memory_usage())
  print(submission_format.dtypes)
  print(submission_format.memory_usage())


  submission_frequency = pd.read_csv(IDIR + 'submission_frequency.csv')
#  submission_updated = pd.read_csv(ODIR + 'submission_updated.csv')
  train.Temperature.fillna(np.nanmedian(train.Temperature), inplace = True)
  submission_format.Temperature.fillna(np.nanmedian(train.Temperature), inplace = True)

  freqs = [900000000000, 3600000000000, 86400000000000]
  seeds = [14,15,16,17,18]


#  seeds = [14,15,16,17,18]*100
#  freqs = [86400000000000]

  best_losses = pd.DataFrame(columns = ['freq', 'seed', 'single', 'blended'], dtype = np.float32)

  for seed in seeds:
    if os.path.isfile(ODIR + 'lw'+str(OUTPUT_VERSION)+'_submission_lgb_'+str(seed)+'.csv'):
      print('skipping training for seed', seed,' file already exists')
      continue
    for freq in freqs:
      best_loss = tune_params(train, submission_format, submission_frequency, freq, n_attempts = NUM_ATTEMPTS, random_seed = seed)
      best_losses = best_losses.append({'freq' : freq, 'seed' : seed, 'single' : best_loss['single'], 'blended' : best_loss['blended']}, ignore_index = True)
      print('best losses so far = ', best_losses)
      print(best_losses.groupby('freq')['single','blended'].mean())
#      print('last 5 losses mean : ', best_losses.iloc[-5:]['blended'].mean())
  filenames = [ODIR + 'lw'+str(OUTPUT_VERSION)+'_submission_lgb_'+str(seed)+'.csv' for seed in seeds]
  average(filenames)   
import pandas as pd
import numpy as np
from util import get_paths, prtime
import multiprocessing
from multiprocessing import Process, Queue
import time
import traceback
import sys
import gc
import lgb
import os
from collections import OrderedDict

IDIR, ODIR = get_paths()
VERSION = 12  # Version of preprocessed files, used in output files names like 'train_updated_vNN.csv'

# Number of nan values in an array


def my_nancount(a):
    return np.sum(np.isnan(a))


#  Calculating historical aggregates
#  df - source dataframe (train set)
#  TestTimestamp - start of test period, no data at this point or beyond is used
#  period - amount of time before TestTimestamp used to calculate aggregetes
#  target col - column to calculate averages (can be Value, Temperature, ...)
#  cols - columns to group by (i.e. we are getting aggregate values for the same values in these columns in the past
#  col_values - current values in this columns (for example, current time and day of week)
Exemple #5
0
This module takes in numpy arrays of the B-Tax final and intermediate
calculations and then puts them into Pandas Dataframes in a format suitable
for tabular representation in the web app.
Last updated: 8/2/2016.

"""
# Import packages
import os.path
import sys
import pandas as pd
import numpy as np
import cPickle as pickle
from util import get_paths, read_from_egg


globals().update(get_paths())


def CBO_compare(vars_by_asset):
    """Function to compare B-Tax output to CBO calcuations

        :param user_params: The user input for implementing reforms
        :type user_params: dictionary
        :returns: METR (by industry and asset) and METTR (by asset)
        :rtype: DataFrame
    """
    # read in CBO file
    CBO_data = pd.read_excel(os.path.join(_REF_DIR, 'effective_taxrates.xls'),
        sheetname='Full detail', header=1, skiprows=0, skip_footer=8)
    CBO_data.columns = [col.encode('ascii', 'ignore') for col in CBO_data]
    CBO_data.rename(columns = {'Top page (Rows 3-35): Equipment        Bottom page (Rows 36-62): All Other ':'Asset Type'}, inplace = True)
Exemple #6
0
def infer():
    """
    Main method. For paths specified in input_paths, computes prediction
    and then saves.
    input_paths can be a list of paths or a path to a directory of x files
    or a path to a csv file with paths in each line of the file.
    """
    if conf["rand_seed"] is not None:
        random.seed(conf["rand_seed"])

    #parsing possible command-line arguments
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--input_paths",
        type=str,
        nargs="?",
        help="path to CSV list of input paths or input file or dir with files",
        default=conf["input_paths"])
    parser.add_argument("--output_dir_path",
                        type=str,
                        nargs="?",
                        help="path to directory to save predictions",
                        default=conf["output_dir_path"])
    parser.add_argument(
        "--model_path",
        type=str,
        nargs="?",
        help="path directory containing meta-graph and weights for model",
        default=conf["model_path"])
    args = parser.parse_args()

    #getting input filepaths
    input_paths = util.get_paths(args.input_paths)
    #getting output_dir_path
    output_dir_path = args.output_dir_path
    #getting model path
    model_path = args.model_path

    if conf["max_n_preds"] is not None:
        random.shuffle(input_paths)
        input_paths = input_paths[:conf["max_n_preds"]]

    #creating base dir if needed
    if not os.path.isdir(output_dir_path):
        os.makedirs(output_dir_path)
    #creating preds dir
    preds_dir = mk_preds_dir(output_dir_path, "preds")

    #meta-model
    meta_model = model.MetaModel(**conf["meta_model_kwargs"])

    with tf.Session(graph=tf.Graph()) as sess:
        #loading model weights
        print("loading model from '{}'...".format(model_path),
              flush=True,
              end=" ")
        model.load(sess, model_path)
        meta_model.set_params_from_colls()
        print("done")

        #building functions
        load_fn = conf["load_fn"]
        pre_proc_fn = conf["pre_proc_fn"]
        save_y_pred_fn = conf["save_y_pred_fn"]
        #prediction function is a composition
        _pred_fn = lambda x: predict(x, meta_model.get_pred_fn(sess))
        pred_fn = (lambda x: hmirr_averaged_predict(x, _pred_fn)) \
            if conf["hmirr_averaged_predict"] else _pred_fn

        #iterating over images doing predictions
        pred_times = []
        #for path in input_paths:
        for path in input_paths:
            print("on file '{}'".format(path))

            #loading
            x = load_fn(path)
            orig_x_shape = x.shape[-2:]
            print('x shape, dtype:', x.shape, x.dtype)

            #pre-processing
            x = pre_proc_fn(x)
            print('[pre-proc] x shape, dtype:', x.shape, x.dtype)

            #predicting
            print("\tpredicting...", flush=True, end=" ")
            start_time = time.time()
            y_pred = pred_fn(x)
            pred_time = time.time() - start_time
            pred_times.append(pred_time)
            print("done. took {:.4f} seconds".format(pred_time), end=" | ")
            print("y_pred shape:", y_pred.shape)

            #saving
            y_pred_path = get_y_pred_path(path, preds_dir)
            save_y_pred_fn(y_pred_path, y_pred, orig_x_shape)
            print("\tsaved y_pred to '{}'".format(y_pred_path))

        print("\ndone prediction on {} files in {:.4f}s (avg {:.4f}s)".format(
            len(input_paths), sum(pred_times), get_mean(pred_times)))
        print("saved preds in '{}'".format(preds_dir))
Exemple #7
0
def main(args):

    with tf.Graph().as_default():

        with tf.Session() as sess:

            # Read the file containing the pairs used for testing
            #readStart = time.clock()
            names = util.read_names(os.path.expanduser(args.lfw_names))
            #print(names)
            # Get the paths for the corresponding images
            paths, actual_issame = util.get_paths(
                os.path.expanduser(args.lfw_dir), names, args.lfw_file_ext)
            #readEnd = time.clock()
            print(paths)

            # Load the model
            #loadStart = time.clock()
            print('Model directory: %s' % args.model_dir)

            meta_file, ckpt_file = util.get_model_filenames(
                os.path.expanduser(args.model_dir))
            #run_metadata = tf.RunMetadata()
            print('Metagraph file: %s' % meta_file)
            print('Checkpoint file: %s' % ckpt_file)
            util.load_model(args.model_dir, meta_file, ckpt_file)
            #loadEnd = time.clock()

            # Get input and output tensors
            images_placeholder = tf.get_default_graph().get_tensor_by_name(
                "input:0")
            embeddings = tf.get_default_graph().get_tensor_by_name(
                "embeddings:0")
            phase_train_placeholder = tf.get_default_graph(
            ).get_tensor_by_name("phase_train:0")

            image_size = images_placeholder.get_shape()[1]
            embedding_size = embeddings.get_shape()[1]

            print('Image Size: %s' % str(image_size))
            print('Embedding Size: %s' % str(embedding_size))

            # Run forward pass to calculate embeddings
            print('Calculating embeddings')
            batch_size = args.lfw_batch_size
            nrof_images = len(paths)
            nrof_batches = int(math.ceil(1.0 * nrof_images / batch_size))
            emb_array = np.zeros((nrof_images, embedding_size))

            runStart = time.clock()
            #run_metadata = tf.RunMetadata()
            for i in range(nrof_batches):
                start_index = i * batch_size
                end_index = min((i + 1) * batch_size, nrof_images)
                paths_batch = paths[start_index:end_index]
                images = util.load_data(paths_batch, image_size)
                feed_dict = {
                    images_placeholder: images,
                    phase_train_placeholder: False
                }
                emb_array[start_index:end_index, :] = sess.run(
                    embeddings, feed_dict=feed_dict)
                break
            runEnd = time.clock()

            #print('Size of image list of batch 100: %d'%sizeImages)
            #print('Path array size : %d'%sys.getsizeof(paths))
            #print('Time to extract path from file: %d'%(readEnd - readStart))
            #print('Time to load model from disk: %d'%(loadEnd - loadStart))
            print('Time to calculate embeddings: %d' % (runEnd - runStart))

            buildIndexStart = time.clock()
            # create an index of Euclidean distance
            p = PannsIndex(dimension=128, metric='euclidean')
            for i in range(0, 50):
                p.add_vector(emb_array[i][:])
            p.parallelize(True)
            p.build(40)

            buildIndexEnd = time.clock()

            results = p.query(
                emb_array[8][:],
                4)  #pick one face and find its 4 nearest neigbour
            print([
                paths[x[0]] for x in results
            ])  #putting brackets around generator expression makes it a list
Exemple #8
0
import numpy as np
import lightgbm as lgb
import gc
import sys
import os

from util import get_paths, prtime, update_params
from sklearn.model_selection import train_test_split
from collections import OrderedDict
import psutil


GLOBAL_PARAMS_UPDATE = False            # If true, best parameters from the previous iteration are used in the next one
PREPROCESS_VERSION = 12                 # Version of preprocessed files
OUTPUT_VERSION = 1247                   # Version of output files to generate
IDIR, ODIR = get_paths()                # Folders with original and generated data
NUM_ATTEMPTS = 10                       # Number of attempts to optimize parameters and create better blended solution for each validation fold
RETRAIN = True                          # Retrain model on the whole training set (train+eval) after evaluation is done
VAL_SIZE = 0.3                          # Validation set ratio

def get_ratio(s1, s2):
    return (s1/s2).fillna(1).astype(np.float32)


#  Calculating static features, not using historical data

def get_static_features(df, metadata):
    df['Timestamp'] = pd.to_datetime(df.Timestamp)
    df['Doy'] = df.Timestamp.dt.dayofyear.astype(np.float32)
    df['Time'] = (df.Timestamp.dt.hour/24.0+df.Timestamp.dt.minute/(24.0*60.0)).astype(np.float32)
    df['DowTime'] = df['Dow']+df['Time']
Exemple #9
0
-------------------------------------------------------------------------------
This module takes in numpy arrays of the B-Tax final and intermediate
calculations and then puts them into Pandas Dataframes in a format suitable
for tabular representation in the web app.
Last updated: 8/2/2016.

"""
# Import packages
import os.path
import sys
import pandas as pd
import numpy as np
import cPickle as pickle
from util import get_paths, read_from_egg

globals().update(get_paths())


def CBO_compare(vars_by_asset):
    """Function to compare B-Tax output to CBO calcuations

        :param user_params: The user input for implementing reforms
        :type user_params: dictionary
        :returns: METR (by industry and asset) and METTR (by asset)
        :rtype: DataFrame
    """
    # read in CBO file
    CBO_data = pd.read_excel(os.path.join(_REF_DIR, 'effective_taxrates.xls'),
                             sheetname='Full detail',
                             header=1,
                             skiprows=0,
Exemple #10
0
"""
Analyzes the uses of the noscript tag.
"""
from util import get_paths, parse_csv_line, as_bool
from util import benchmark_columns as columns

from urllib.parse import urlparse
import os
import lxml.html

bm_file_path, _, noscript_dir_path, _ = get_paths()

# Category definitions.
cat_alt = "alternative_content"
cat_track = "tracking_metrics"
cat_other = "other"

def warn_tag(tag, url):
    """
    Warns about a tag not being recognized properly.
    """
    print("Unrecognized tag for {}: {} {}".format(url.hostname, tag.tag, tag.attrib))

def is_url_relative(url):
    """
    True if a URL is relative, False otherwise.
    """
    return url[0] == "/" and url[1] != "/"

def one_of_in(lst, val):
    """
Exemple #11
0
"""
Computes the median script execution time for every
website (doesn't work).
"""
from util import get_paths, parse_csv_line
from util import benchmark_columns as columns

import os, json

_, metrics_dir_path, _, _ = get_paths()
metrics_file_names = os.listdir(metrics_dir_path)

for file_name in metrics_file_names:
    # Skip metrics taken when JS was disabled.
    if "nojs" in file_name:
        continue

    file_path = os.path.join(metrics_dir_path, file_name)

    # List to load the JSON array with browser metrics into.
    metrics = []
    # List of script execution durations.
    script_timings = []

    with open(file_path, "r") as f:
        metrics = json.load(f)

    # Compute the difference in time between the current and
    # the previous sample taken.
    for i in range(1, len(metrics)):
        t = metrics[i]["ScriptDuration"] - metrics[i - 1]["ScriptDuration"]
Exemple #12
0
def train():
    #parsing possible command-line arguments
    parser = argparse.ArgumentParser()
    parser.add_argument('--output_dir_path',
                        type=str,
                        nargs='?',
                        help='path to directory to save train data',
                        default=conf['output_dir_path'])
    parser.add_argument('--pre_trained_model_path',
                        type=str,
                        nargs='?',
                        help='path to pre-trained model',
                        default=conf['pre_trained_model_path'])
    parser.add_argument('--train_set',
                        type=str,
                        nargs='?',
                        help='path to csv list of train set paths',
                        default=conf['train_set'])
    parser.add_argument('--val_set',
                        type=str,
                        nargs='?',
                        help='path to csv list of validation set paths',
                        default=conf['val_set'])
    args = parser.parse_args()

    #getting output_dir_path
    output_dir_path = args.output_dir_path
    #getting pre_trained_model_path
    pre_trained_model_path = args.pre_trained_model_path
    #getting train_set
    train_set = util.get_paths(args.train_set)
    #getting val
    val_set = util.get_paths(args.val_set)

    out_dir = util.mk_model_dir(output_dir_path)
    print('created out dir \'{}\', populating...'.format(out_dir),
          flush=True,
          end=' ')
    populate_out_dir(out_dir, train_set, val_set)
    print('done.')

    #meta-model
    meta_model_kwargs = dict(conf['meta_model_kwargs'])
    if 'rand_seed' not in meta_model_kwargs:
        meta_model_kwargs['rand_seed'] = conf['rand_seed'] + 2
    meta_model = model.MetaModel(**meta_model_kwargs)

    #creating logging object
    log = util.Tee([
        sys.stdout,
        open(os.path.join(out_dir, 'etc', 'train-log', 'train.log'), 'w')
    ])

    #building graph
    if pre_trained_model_path is None:
        log.print('[info] building graph for the first time')
        graph = meta_model.build_graph()
    else:
        graph = tf.Graph()

    #tensorboard logging paths
    summ_dir = os.path.join(out_dir, 'etc', 'train-log', 'summaries')

    #training session
    with tf.Session(graph=graph) as sess:
        #if first time training, creates graph collections for model params
        #else, loads model weights and params from collections
        if pre_trained_model_path is None:
            sess.run(
                tf.group(tf.global_variables_initializer(),
                         tf.local_variables_initializer()))
            meta_model.mk_params_colls(graph=graph)
        else:
            log.print('[info] loading graph/weights from \'{}\''.format(
                pre_trained_model_path))
            model.load(sess, pre_trained_model_path)
            meta_model.set_params_from_colls(graph=graph)

        #building functions
        #train function: cumputes loss
        _train_fn = meta_model.get_train_fn(sess)

        def train_fn(x, y_true):
            return _train_fn(
                x, y_true,
                {meta_model.params['learning_rate']: conf['learning_rate']})

        #test function: returns a dict with pairs metric_name: metric_value
        _test_fn = meta_model.get_test_fn(sess)

        def test_fn(x, y_true):
            metrics_values = _test_fn(x, y_true)
            return OrderedDict(
                zip(meta_model.params['metrics'].keys(), metrics_values))

        #save model function: given epoch and iter number, saves checkpoint
        def save_model_fn(epoch=None, it=None, name=None):
            if name is None:
                path = os.path.join(out_dir, 'self', 'ckpts',
                                    'epoch-{}_it-{}'.format(epoch, it))
            else:
                path = os.path.join(out_dir, 'self', 'ckpts',
                                    '{}'.format(name))
            model.save(sess, path, overwrite=True)
            print('    saved checkpoint to \'{}\''.format(path))

        #test
        if conf['use_tensorboard']:
            #tensorboard summary writers
            train_writer = tf.summary.FileWriter(os.path.join(
                summ_dir, 'train'),
                                                 graph=graph)
            val_writer = tf.summary.FileWriter(os.path.join(summ_dir, 'val'),
                                               graph=graph)
            #running tensorboard
            cmd = ['tensorboard', '--logdir={}'.format(summ_dir)]
            cmd.extend('--{}={}'.format(k, v) \
                for k, v in conf['tensorboard_params'].items())
            log.print('[info] running \'{}\''.format(' '.join(cmd)))
            proc = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE)

            _log_fn = meta_model.get_summary_fn(sess)

            def log_fn(x, y_true, its, train=True):
                summ = _log_fn(x, y_true)
                if train:
                    train_writer.add_summary(summ, its)
                    if its % 10 == 0:
                        train_writer.flush()
                else:
                    val_writer.add_summary(summ, its)
                    if its % 10 == 0:
                        val_writer.flush()
        else:
            log_fn = None

        #main train loop
        print('calling train loop')
        try:
            trloop.train_loop(
                train_set=train_set,
                train_fn=train_fn,
                n_epochs=conf['n_epochs'],
                val_set=val_set,
                val_fn=test_fn,
                val_every_its=conf['val_every_its'],
                patience=conf['patience'],
                log_every_its=conf['log_every_its'],
                log_fn=log_fn,
                save_model_fn=save_model_fn,
                save_every_its=conf['save_every_its'],
                batch_gen_kw=conf['batch_gen_kw'],
                log_batch_gen_kw=conf['log_batch_gen_kw'],
                better_loss_tol=conf['better_loss_tol'],
                verbose=conf['verbose'],
                print_fn=log.print,
            )
        except KeyboardInterrupt:
            print('Keyboard Interrupt event.')
        finally:
            #closing tensorboard writers
            if conf['use_tensorboard']:
                train_writer.close()
                val_writer.close()

            #saving model on final state
            path = os.path.join(out_dir, 'self', 'ckpts', 'final')
            print('saving checkpoint to \'{}\'...'.format(path), flush=True)
            model.save(sess, path, overwrite=True)

    print('\ndone.', flush=True)
Exemple #13
0
def process_init(sender=None, conf=None, **kwargs):
    if sender.hostname == 'worker1@harshitpc':
        with tf.Graph().as_default():
            with tf.Session() as sess:
                # Read the file containing the pairs used for testing
                #readStart = time.clock()
                names = util.read_names(os.path.expanduser(lfw_names))
                #print(names)
                # Get the paths for the corresponding images
                paths, actual_issame = util.get_paths(
                    os.path.expanduser(lfw_dir), names, lfw_file_ext)
                #readEnd = time.clock()

                print("Done Initializing")
                # Load the model
                #loadStart = time.clock()
                print('Model directory: %s' % model_dir)

                meta_file, ckpt_file = util.get_model_filenames(
                    os.path.expanduser(model_dir))
                #run_metadata = tf.RunMetadata()
                print('Metagraph file: %s' % meta_file)
                print('Checkpoint file: %s' % ckpt_file)
                util.load_model(model_dir, meta_file, ckpt_file)
                #loadEnd = time.clock()

                # Get input and output tensors
                images_placeholder = tf.get_default_graph().get_tensor_by_name(
                    "input:0")
                embeddings = tf.get_default_graph().get_tensor_by_name(
                    "embeddings:0")
                phase_train_placeholder = tf.get_default_graph(
                ).get_tensor_by_name("phase_train:0")

                image_size = 160  # Warning. This was hardcoded. General should be ---> images_placeholder.get_shape()[1]
                embedding_size = 128  #Warning. This was hardcoded. General should be ---> embeddings.get_shape()[1]

                #print('Embedding Size: %s' %str(embedding_size))

                # Run forward pass to calculate embeddings
                print('Calculating embeddings')
                batch_size = lfw_batch_size
                nrof_images = len(paths)
                nrof_batches = int(math.ceil(1.0 * nrof_images / batch_size))

                #INCREDIBLY STUPID STUFF TO FOLLOW. WILL STRUCTURE THE CODE PROPERLY TO AVOID THIS LATER.
                global emb_array

                runStart = time.clock()
                #run_metadata = tf.RunMetadata()
                for i in range(nrof_batches):
                    start_index = i * batch_size
                    end_index = min((i + 1) * batch_size, nrof_images)
                    paths_batch = paths[start_index:end_index]
                    images = util.load_data(paths_batch, image_size)
                    feed_dict = {
                        images_placeholder: images,
                        phase_train_placeholder: False
                    }
                    emb_array[start_index:end_index, :] = sess.run(
                        embeddings, feed_dict=feed_dict)
                    break
Exemple #14
0
if argc < 2:
    print("usage: {} outputdir plot".format(sys.argv[0]))
    exit()

action = sys.argv[2]
plot_types = ["hist_load", "hist_domload", "hist_idle"]

if action not in plot_types:
    print("plot argument must be one of: {}".format(", ".join(plot_types)))
    exit()

# matplotlib is quite massive so we're only importing it now.
import matplotlib
import matplotlib.pyplot as plt

bm_file_path, _, _, _ = get_paths()
bm_results_file_path = append_to_filename(bm_file_path, "_results")

# Read results file into rows field.
rows = []

with open(bm_results_file_path) as f:
    # Skip CSV header.
    next(f)

    for line in f:
        line = parse_csv_line(line)
        line[1] = as_bool(line[1])
        line[2] = as_bool(line[2])

        for i in range(3, len(line)):
Exemple #15
0
# Parse arguments
args = parse_arguments(argv[1:])

# Generate dummy data
print('Loading data...')
data_dir = args.data_dir
_, _, X_test, y_test = ember.read_vectorized_features(data_dir,
                                                      scale=args.scale)
X_test = np.array(X_test)

X_test = X_test[y_test != -1]
y_test = y_test[y_test != -1]

model_dir = args.model_dir
path_dict = get_paths(model_dir)
json_file = open(path_dict['graph'], 'r')
loaded_model_json = json_file.read()
json_file.close()
model = model_from_json(loaded_model_json)
model.load_weights(path_dict['model'])
with open(path_dict['scaler'], 'rb') as f:
    scaler = pkl.load(f)

X_test = scaler.transform(X_test)
X_test = np.expand_dims(X_test, axis=-1)
y_test = keras.utils.to_categorical(y_test, num_classes=2)

# ROC curve
y_pred = model.predict(X_test)
fpr, tpr, thresholds = roc_curve(np.argmax(y_test, axis=1),
Exemple #16
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import os
import sys
import re
import pandas as pd
import util

# パス取得
paths = util.get_paths()
data_path = paths['data_path']
input_path = paths['input_path']
input_raw_path = paths['input_raw_path']
input_unzip_path = paths['input_unzip_path']


def merge_data():
    """
    ### サンプルデータ作成
    - kaggleのリクルートホールディングスのデータを加工
    """
    # データ解凍・読み込み
    for fname in ['air_visit_data', 'air_store_info']:
        util.unzip(f'{input_raw_path}/{fname}.csv.zip', input_unzip_path)
    df_visit = pd.read_csv(f'{input_unzip_path}/air_visit_data.csv')
    df_store = pd.read_csv(f'{input_unzip_path}/air_store_info.csv')

    (df_visit.merge(df_store, on='air_store_id', how='left').assign(
        pref_name=lambda x: x['air_area_name'].str.split(' ').str.get(0).str.
        replace('Tōkyō-to', '東京都').str.replace('Ōsaka-fu', '大阪府').str.replace(
Exemple #17
0
"""
Removes redundant files in the subdirectories of the output
file that don't belong to any row present in the main table.
"""
from util import get_paths, parse_csv_line, try_remove
from util import benchmark_columns as columns

import os

bm_file_path, metrics_dir_path, noscript_dir_path, screenshots_dir_path = get_paths(
)

# List all subdirectory contents.
metrics_dir_list = os.listdir(metrics_dir_path)
noscript_dir_list = os.listdir(noscript_dir_path)
screenshots_dir_list = os.listdir(screenshots_dir_path)

with open(bm_file_path, "r") as f:
    # Skip CSV header.
    next(f)

    # Remove the files from the directory listings that are
    # referenced in the main table. In the end, the lists will
    # only contain files that need to be deleted.
    for line in f:
        data_file_name = parse_csv_line(line)[columns["dataFileName"]]

        try_remove(metrics_dir_list, data_file_name + ".json")
        try_remove(noscript_dir_list, data_file_name + ".html")
        try_remove(screenshots_dir_list, data_file_name + ".png")