def main(argv): for patch_size in ['8x8', '16x16']: data = load('data/vanhateren.{0}.0.npz'.format(patch_size))['data'] data = preprocess(data) savemat('data/vanhateren.{0}.test.mat'.format(patch_size), {'data': data}) data = load('data/vanhateren.{0}.1.npz'.format(patch_size))['data'] data = preprocess(data) savemat('data/vanhateren.{0}.train.mat'.format(patch_size), {'data': data}) return 0
def telemetry(sid, data): if data: # The current steering angle of the car steering_angle = data["steering_angle"] # The current throttle of the car throttle = data["throttle"] # The current speed of the car speed = data["speed"] # The current image from the center camera of the car imgString = data["image"] image = Image.open(BytesIO(base64.b64decode(imgString))) image_array = np.asarray(image) image_array = image_array[:, :, :: -1] # this line converts image from RGB to BGR image_array = preprocess(image_array) steering_angle = float( model.predict(image_array[None, :, :, :], batch_size=1)) throttle = controller.update(float(speed)) print(steering_angle, throttle) send_control(steering_angle, throttle) # save frame if args.image_folder != '': timestamp = datetime.utcnow().strftime('%Y_%m_%d_%H_%M_%S_%f')[:-3] image_filename = os.path.join(args.image_folder, timestamp) image.save('{}.jpg'.format(image_filename)) else: # NOTE: DON'T EDIT THIS. sio.emit('manual', data={}, skip_sid=True)
def visualize_attention(input_model, image, steering_angle_gt, layer_name): model = Sequential() model.add(input_model) target_layer = lambda x: mse_loss(x, steering_angle_gt) model.add(Lambda(target_layer, output_shape=mse_loss_output_shape)) loss = K.sum(model.layers[-1].output) conv_output = input_model.get_layer(layer_name).output grads = normalize(K.gradients(loss, conv_output)[0]) gradient_function = K.function([model.layers[0].input], [conv_output, grads]) image_array = preprocess(image) output, grads_val = gradient_function([image_array[None, :, :, :]]) output, grads_val = output[0, :], grads_val[0, :, :, :] weights = np.mean(grads_val, axis=(0, 1)) vismap = np.ones(output.shape[0:2], dtype=np.float32) for i, w in enumerate(weights): vismap += w * output[:, :, i] image = image[50:-20, 30:-30, :] vismap = cv2.resize(vismap, tuple(image.shape[0:2][::-1])) vismap = np.exp(vismap) - 1 heatmap = vismap / np.max(vismap) vismap = cv2.applyColorMap(np.uint8(255 * heatmap), cv2.COLORMAP_JET) vismap = np.float32(vismap) + np.float32(image) vismap = 255 * vismap / np.max(vismap) vismap = cv2.resize(vismap, (360, 360)) return np.uint8(vismap), heatmap
def upload_page(): if request.method == 'POST': # check if there is a file in the request if 'file' not in request.files: return render_template('upload.html', msg='No file selected') file = request.files['file'] # if no file is selected if file.filename == '': return render_template('upload.html', msg='No file selected') if file and allowed_file(file.filename): # call the preprocess function on it img = Image.open(file) img.save('input.jpg', 'JPEG') optimizedImage = preprocess('input.jpg') # call the OCR function on it extracted_text = ocr(optimizedImage) # extract the text and display it return render_template('upload.html', msg='Successfully processed', extracted_text=extracted_text, img_src=UPLOAD_FOLDER + file.filename) elif request.method == 'GET': return render_template('upload.html')
def main(argv): experiment = Experiment() # load and preprocess data samples data = load('./data/vanhateren4x4.npz')['data'] data = preprocess(data) # train mixture of Gaussian scale mixtures mixture = MoGSM(data.shape[0], 8, 4) mixture.train(data, num_epochs=100) # split data batches = mixture.split(data) # Gaussianize data for k in range(len(mixture)): batches[k] = RadialGaussianization(mixture[k], symmetric=False)(batches[k]) # store results experiment.results['mixture'] = mixture experiment.results['batches'] = batches experiment.save('results/experiment01/experiment01a.{0}.{1}.xpck') return 0
def __init__(self, is_training=False, num_classes=19, input_size=[1024, 2048]): self.input_size = input_size self.x = tf.placeholder(dtype=tf.float32, shape=[None, None, 3]) self.img_tf, self.shape = preprocess(self.x, self.input_size, 'icnet') super().__init__({'data': self.img_tf}, num_classes, is_training)
def dreaming(agent, cameras, lidars, occupancies, actions, obstype, basedir): """ Given observation, actions, camera and map images run 'dreaming' in latent space and store the real observation and the reconstructed one. """ data = { 'lidar': np.stack(np.expand_dims(lidars, 0)), 'action': np.stack(np.expand_dims(actions, 0)), 'lidar_occupancy': np.stack(np.expand_dims(occupancies, 0)) } data = tools.preprocess( data, config=None ) # note: this is ugly but since we don't use reward clipping, i can pass config None data['image'] = np.stack(np.expand_dims(cameras, 0)) # hack: don't preprocess image embed = agent._encode(data) post, prior = agent._dynamics.observe(embed, data['action']) feat = agent._dynamics.get_feat(post) image_pred = agent._decode(feat) save_dreams(basedir, agent, data, embed, image_pred, obs_type=obstype, summary_length=len(lidars) - 1)
def load_image(X_sample, angle, is_training): if is_training and np.random.random() > 0.8: c = np.random.randint(1, 3) # randomly choose left or right image name = './data/IMG/' + X_sample[c].strip().split('/')[-1] if c == 1: angle = float(angle) + 0.2 elif c == 2: angle = float(angle) - 0.2 image = cv2.imread(name) image = preprocess(image) return image, angle else: #chose center images most of the time name = './data/IMG/' + X_sample[0].strip().split('/')[-1] angle = float(angle) image = cv2.imread(name) image = preprocess(image) return image, angle
def _construct_and_fill_model(self): super()._construct_and_fill_model() sly.env.remap_gpu_devices([self._config[GPU_DEVICE]]) n_cls = (max(self.out_class_mapping.keys()) + 1) self.image_tensor = tf.placeholder("float32", list(self.input_size) + [3]) img = preprocess(self.image_tensor, self.input_size[0], self.input_size[1]) net = self.model_class({'data': img}, is_training=False, num_classes=n_cls) # Predictions. raw_output = net.layers['conv6'] if self._config[USE_FLIP]: with tf.variable_scope('', reuse=True): flipped_img = tf.image.flip_left_right( tf.squeeze(self.image_tensor)) flipped_img = tf.expand_dims(flipped_img, dim=0) net_flip = self.model_class({'data': flipped_img}, is_training=False, num_classes=n_cls) flipped_output = tf.image.flip_left_right( tf.squeeze(net_flip.layers['conv6'])) flipped_output = tf.expand_dims(flipped_output, dim=0) raw_output = tf.add_n([raw_output, flipped_output]) raw_output_up = tf.image.resize_bilinear(raw_output, size=self.input_size, align_corners=True) raw_output_up = tf.image.crop_to_bounding_box(raw_output_up, 0, 0, self.input_size[0], self.input_size[1]) raw_output_up = tf.argmax(raw_output_up, axis=3) self.pred_holder = decode_to_n_channels(raw_output_up, self.input_size, n_cls) # Init tf Session config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) init = tf.global_variables_initializer() self.sess.run(init) restore_var = tf.global_variables() loader = tf.train.Saver(var_list=restore_var) loader.restore(self.sess, os.path.join(sly.TaskPaths.MODEL_DIR, 'model.ckpt-0'))
def policy(self, obs, state, training): if state is None: latent = self._dynamics.initial(len(obs[self._c.obs_type])) action = tf.zeros((len(obs[self._c.obs_type]), self._actdim), self._float) else: latent, action = state embed = self._encode(tools.preprocess(obs, self._c)) latent, _ = self._dynamics.obs_step(latent, action, embed) feat = self._dynamics.get_feat(latent) if training: action = self._actor(feat).sample() else: action = self._actor(feat).mode() action = self._exploration(action, training) state = (latent, action) return action, state
def _construct_and_fill_model(self): model_dir = sly.TaskPaths(determine_in_project=False).model_dir self.device_ids = sly.remap_gpu_devices([self.source_gpu_device]) logger.info('Will create model.') with tf.get_default_graph().as_default(): img_np = tf.placeholder(tf.float32, shape=(None, None, 3)) img_shape = tf.shape(img_np) w, h = self.input_size_wh img_np_4d = tf.expand_dims(img_np, axis=0) image_rs_4d = tf.image.resize_bilinear(img_np_4d, (h, w), align_corners=True) image_rs = tf.squeeze(image_rs_4d, axis=0) img = preprocess(image_rs, h, w) if 'model' in self.train_config and self.train_config['model'] == 'pspnet101': PSPNet = PSPNet101 allign_corners = True else: PSPNet = PSPNet50 allign_corners = False net = PSPNet({'data': img}, is_training=False, num_classes=len(self.train_classes)) raw_output = net.layers['conv6'] # 4d # Predictions. raw_output_up = tf.image.resize_bilinear(raw_output, size=[img_shape[0], img_shape[1]], align_corners=False) # raw_output_up = tf.argmax(raw_output_up, dimension=3) logger.info('Will load weights from trained model.') # Init tf Session config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) loader = tf.train.Saver(var_list=tf.global_variables()) # last_checkpoint = tf_saver.latest_checkpoint(output_train_dir) last_checkpoint = osp.join(model_dir, 'model.ckpt') loader.restore(sess, last_checkpoint) self.input_images = img_np self.predictions = raw_output_up self.sess = sess logger.info('Model has been created & weights are loaded.')
def simulation(start_day=4, start_hour=8, duration_in_days=14 + 7 / 24, seed=3): """Runs full simulation and returns log""" lambdas_vec_weekday, lambdas_vec_weekend, behaviour_freqs, user_behaviour_df, df, prior_a, prior_b, action_time_vec\ = preprocess() np.random.seed(seed) sim_log = [] timeline = Timeline(lambdas_vec_weekend, lambdas_vec_weekday, start_day=start_day, duration_in_days=duration_in_days, start_hour=start_hour) timeline.simulate_server_init_times() for (i, time) in enumerate(timeline.server_init_times): user = User(i, time, behaviour_freqs, user_behaviour_df, df, prior_a, prior_b, action_time_vec) user.interact() sim_log += user.log sim_log_df = pd.DataFrame(sim_log, columns=['uId', 'storeId', 'action', 'eventTime']).sort_values('eventTime') sim_log_df.to_csv('simulation_results.csv')
def _construct_and_fill_model(self): logger.info('Will create model.') with tf.get_default_graph().as_default(): img_np = tf.placeholder(tf.float32, shape=(None, None, 3)) img_shape = tf.shape(img_np) w, h = self.input_size_wh img_np_4d = tf.expand_dims(img_np, axis=0) image_rs_4d = tf.image.resize_bilinear(img_np_4d, (h, w), align_corners=True) image_rs = tf.squeeze(image_rs_4d, axis=0) img = preprocess(image_rs, h, w) net = ICNet_BN( {'data': img}, is_training=False, num_classes=len(self.train_classes), filter_scale=self.train_config['settings']['filter_scale']) raw_output = net.layers['conv6_cls'] # 4d # Predictions. # !!!!!!!!! align_corners=False raw_output_up = tf.image.resize_bilinear( raw_output, size=[img_shape[0], img_shape[1]], align_corners=False) # raw_output_up = tf.argmax(raw_output_up, dimension=3) logger.info('Will load weights from trained model.') # Init tf Session config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) loader = tf.train.Saver(var_list=tf.global_variables()) # last_checkpoint = tf_saver.latest_checkpoint(output_train_dir) last_checkpoint = osp.join(self.helper.paths.model_dir, 'model.ckpt') loader.restore(sess, last_checkpoint) self.input_images = img_np self.predictions = raw_output_up self.sess = sess logger.info('Model has been created & weights are loaded.')
def main(argv): if len(argv) < 2: print 'Usage:', argv[0], '<experiment>', '[data_points]' return 0 experiment = Experiment() # range of data points evaluated if len(argv) < 3: fr, to = 0, 1000 else: if '-' in argv[2]: fr, to = argv[2].split('-') fr, to = int(fr), int(to) else: fr, to = 0, int(argv[2]) indices = range(fr, to) # load experiment with trained model results = Experiment(argv[1]) # generate test data data = load('data/vanhateren.{0}.0.npz'.format(results['parameters'][0]))['data'] data = preprocess(data, shuffle=False) # compute importance weights estimating likelihoods ais_weights = results['model'].loglikelihood(data[:, indices], num_samples=NUM_AIS_SAMPLES, sampling_method=('ais', {'num_steps': NUM_AIS_STEPS}), return_all=True) # average log-likelihood in [bit/pixel] loglik = mean(logmeanexp(ais_weights, 0)) / log(2.) / data.shape[0] sem = std(logmeanexp(ais_weights, 0), ddof=1) / log(2.) / data.shape[0] / sqrt(ais_weights.shape[1]) # store save results experiment['indices'] = indices experiment['ais_weights'] = ais_weights experiment['loglik'] = loglik experiment['sem'] = sem experiment['fixed'] = True experiment.save(argv[1][:-4] + '{0}-{1}.xpck'.format(fr, to)) return 0
def main(argv): experiment = Experiment() # load and preprocess data data = load('./data/vanhateren8x8.npz')['data'] data = preprocess(data) # train a mixture of Gaussian scale mixtures mixture = MoGSM(data.shape[0], 8, 4) mixture.train(data[:, :100000], num_epochs=100) # compute training error avglogloss = mixture.evaluate(data[:, 100000:]) # store results experiment.results['mixture'] = mixture experiment.results['avglogloss'] = avglogloss experiment.save('results/experiment01/experiment01b.{0}.{1}.xpck') return 0
def main(argv): experiment = Experiment() # load and preprocess data data = load('./data/vanhateren8x8.npz')['data'] data = preprocess(data) # train a mixture of Gaussian scale mixtures mixture = MoGSM(data.shape[0], 8, 4) mixture.train(data[:, :100000], num_epochs=100) # compute training error avglogloss = mixture.evaluate(data[:, 100000:]) # store results experiment.results['mixture'] = mixture experiment.results['avglogloss'] = avglogloss experiment.save('results/experiment01/experiment01b.{0}.{1}.xpck') return 0
def main(): train_df, test_df = read_data(PATH, TRAIN_SET, TEST_SET) (train_images, train_labels), (cv_images, cv_labels) = preprocess(train_df, validation_size=VALIDATION_SIZE) cross = {'features': cv_images, 'labels': cv_labels} data = TrainBatcher(train_images, train_labels) model = Mnist(data, cv_data=cross, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, kernel=5, filters=[32, 64], dropout=0.5, fc=1024) model.build_graph() # Training saver = tf.train.Saver() with tf.Session() as sess: model.train(sess, 1000, saver)
def main(argv): experiment = Experiment() # load and preprocess data samples data = load('./data/vanhateren4x4.npz')['data'] data = preprocess(data) # train mixture of Gaussian scale mixtures mixture = MoGSM(data.shape[0], 8, 4) mixture.train(data, num_epochs=100) # split data batches = mixture.split(data) # Gaussianize data for k in range(len(mixture)): batches[k] = RadialGaussianization(mixture[k], symmetric=False)(batches[k]) # store results experiment.results['mixture'] = mixture experiment.results['batches'] = batches experiment.save('results/experiment01/experiment01a.{0}.{1}.xpck') return 0
def main(): if tf.__version__.split('.')[0] != "1": raise Exception("Tensorflow version 1 required") if a.seed is None: a.seed = random.randint(0, 2**31 - 1) tf.set_random_seed(a.seed) np.random.seed(a.seed) random.seed(a.seed) if not os.path.exists(a.output_dir): os.makedirs(a.output_dir) #%% test if a.mode == "test" or a.mode == "export": if a.checkpoint is None: raise Exception("checkpoint required for test mode") # load some options from the checkpoint options = {"which_direction", "ngf", "ndf", "lab_colorization"} with open(os.path.join(a.checkpoint, "options.json")) as f: for key, val in json.loads(f.read()).items(): if key in options: print("loaded", key, "=", val) setattr(a, key, val) # disable these features in test mode a.scale_size = CROP_SIZE a.flip = False #%% for k, v in a._get_kwargs(): print(k, "=", v) with open(os.path.join(a.output_dir, "options.json"), "w") as f: f.write(json.dumps(vars(a), sort_keys=True, indent=4)) #%% export the meta if a.mode == "export": # export the generator to a meta graph that can be imported later for standalone generation if a.lab_colorization: raise Exception("export not supported for lab_colorization") input = tf.placeholder(tf.string, shape=[1]) input_data = tf.decode_base64(input[0]) input_image = tf.image.decode_png(input_data) # remove alpha channel if present #if true, excute the former ,otherwise the latter input_image = tf.cond(tf.equal(tf.shape(input_image)[2], 4), lambda: input_image[:, :, :3], lambda: input_image) # convert grayscale to RGB input_image = tf.cond(tf.equal(tf.shape(input_image)[2], 1), lambda: tf.image.grayscale_to_rgb(input_image), lambda: input_image) input_image = tf.image.convert_image_dtype(input_image, dtype=tf.float32) input_image.set_shape([CROP_SIZE, CROP_SIZE, 3]) batch_input = tf.expand_dims(input_image, axis=0) with tf.variable_scope("generator"): batch_output = tools.deprocess( create_generator(tools.preprocess(batch_input), 3)) output_image = tf.image.convert_image_dtype(batch_output, dtype=tf.uint8)[0] if a.output_filetype == "png": output_data = tf.image.encode_png(output_image) elif a.output_filetype == "jpeg": output_data = tf.image.encode_jpeg(output_image, quality=80) else: raise Exception("invalid filetype") output = tf.convert_to_tensor([tf.encode_base64(output_data)]) key = tf.placeholder(tf.string, shape=[1]) inputs = {"key": key.name, "input": input.name} tf.add_to_collection("inputs", json.dumps(inputs)) outputs = { "key": tf.identity(key).name, "output": output.name, } tf.add_to_collection("outputs", json.dumps(outputs)) init_op = tf.global_variables_initializer() restore_saver = tf.train.Saver() export_saver = tf.train.Saver() with tf.Session() as sess: sess.run(init_op) print("loading model from checkpoint") checkpoint = tf.train.latest_checkpoint(a.checkpoint) restore_saver.restore(sess, checkpoint) print("exporting model") export_saver.export_meta_graph( filename=os.path.join(a.output_dir, "export.meta")) export_saver.save(sess, os.path.join(a.output_dir, "export"), write_meta_graph=False) return #%% examples = load_examples(a) print("examples count = %d" % examples.count) # inputs and targets are [batch_size, height, width, channels] model = create_model(examples.inputs, examples.targets, a) # undo colorization splitting on images that we use for display/output if a.lab_colorization: if a.which_direction == "AtoB": # inputs is brightness, this will be handled fine as a grayscale image # need to augment targets and outputs with brightness targets = tools.augment(examples.targets, examples.inputs) outputs = tools.augment(model.outputs, examples.inputs) # inputs can be deprocessed normally and handled as if they are single channel # grayscale images inputs = tools.deprocess(examples.inputs) elif a.which_direction == "BtoA": # inputs will be color channels only, get brightness from targets inputs = tools.augment(examples.inputs, examples.targets) targets = tools.deprocess(examples.targets) outputs = tools.deprocess(model.outputs) else: raise Exception("invalid direction") else: inputs = tools.deprocess(examples.inputs) targets = tools.deprocess(examples.targets) outputs = tools.deprocess(model.outputs) # reverse any processing on images so they can be written to disk or displayed to user with tf.name_scope("convert_inputs"): converted_inputs = convert(inputs) with tf.name_scope("convert_targets"): converted_targets = convert(targets) with tf.name_scope("convert_outputs"): converted_outputs = convert(outputs) with tf.name_scope("encode_images"): display_fetches = { "paths": examples.paths, "inputs": tf.map_fn(tf.image.encode_png, converted_inputs, dtype=tf.string, name="input_pngs"), "targets": tf.map_fn(tf.image.encode_png, converted_targets, dtype=tf.string, name="target_pngs"), "outputs": tf.map_fn(tf.image.encode_png, converted_outputs, dtype=tf.string, name="output_pngs"), } # summaries with tf.name_scope("inputs_summary"): tf.summary.image("inputs", converted_inputs) with tf.name_scope("targets_summary"): tf.summary.image("targets", converted_targets) with tf.name_scope("outputs_summary"): tf.summary.image("outputs", converted_outputs) with tf.name_scope("predict_real_summary"): tf.summary.image( "predict_real", tf.image.convert_image_dtype(model.predict_real, dtype=tf.uint8)) with tf.name_scope("predict_fake_summary"): tf.summary.image( "predict_fake", tf.image.convert_image_dtype(model.predict_fake, dtype=tf.uint8)) tf.summary.scalar("discriminator_loss", model.discrim_loss) tf.summary.scalar("generator_loss_GAN", model.gen_loss_GAN) tf.summary.scalar("generator_loss_L1", model.gen_loss_L1) for var in tf.trainable_variables(): tf.summary.histogram(var.op.name + "/values", var) for grad, var in model.discrim_grads_and_vars + model.gen_grads_and_vars: tf.summary.histogram(var.op.name + "/gradients", grad) with tf.name_scope("parameter_count"): parameter_count = tf.reduce_sum( [tf.reduce_prod(tf.shape(v)) for v in tf.trainable_variables()]) saver = tf.train.Saver(max_to_keep=1) logdir = a.output_dir if (a.trace_freq > 0 or a.summary_freq > 0) else None sv = tf.train.Supervisor(logdir=logdir, save_summaries_secs=0, saver=None) with sv.managed_session() as sess: print("parameter_count =", sess.run(parameter_count)) if a.checkpoint is not None: print("loading model from checkpoint") checkpoint = tf.train.latest_checkpoint(a.checkpoint) saver.restore(sess, checkpoint) max_steps = 2**32 if a.max_epochs is not None: max_steps = examples.steps_per_epoch * a.max_epochs if a.max_steps is not None: max_steps = a.max_steps if a.mode == "test": # testing # at most, process the test data once max_steps = min(examples.steps_per_epoch, max_steps) for step in range(max_steps): results = sess.run(display_fetches) filesets = save_images(results) for i, f in enumerate(filesets): print("evaluated image", f["name"]) index_path = append_index(filesets) print("wrote index at", index_path) else: # training start = time.time() for step in range(max_steps): def should(freq): return freq > 0 and ((step + 1) % freq == 0 or step == max_steps - 1) options = None run_metadata = None if should(a.trace_freq): options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() fetches = { "train": model.train, "global_step": sv.global_step, } if should(a.progress_freq): fetches["discrim_loss"] = model.discrim_loss fetches["gen_loss_GAN"] = model.gen_loss_GAN fetches["gen_loss_L1"] = model.gen_loss_L1 if should(a.summary_freq): fetches["summary"] = sv.summary_op if should(a.display_freq): fetches["display"] = display_fetches results = sess.run(fetches, options=options, run_metadata=run_metadata) if should(a.summary_freq): print("recording summary") sv.summary_writer.add_summary(results["summary"], results["global_step"]) if should(a.display_freq): print("saving display images") filesets = save_images(results["display"], step=results["global_step"]) append_index(filesets, step=True) if should(a.trace_freq): print("recording trace") sv.summary_writer.add_run_metadata( run_metadata, "step_%d" % results["global_step"]) if should(a.progress_freq): # global_step will have the correct step count if we resume from a checkpoint train_epoch = math.ceil(results["global_step"] / examples.steps_per_epoch) train_step = (results["global_step"] - 1) % examples.steps_per_epoch + 1 rate = (step + 1) * a.batch_size / (time.time() - start) remaining = (max_steps - step) * a.batch_size / rate print( "progress epoch %d step %d image/sec %0.1f remaining %dm" % (train_epoch, train_step, rate, remaining / 60)) print("discrim_loss", results["discrim_loss"]) print("gen_loss_GAN", results["gen_loss_GAN"]) print("gen_loss_L1", results["gen_loss_L1"]) if should(a.save_freq): print("saving model") saver.save(sess, os.path.join(a.output_dir, "model"), global_step=sv.global_step) if sv.should_stop(): break
'''Not used but kept for future reference ''' import pandas as pd from db_sqlite import conn from tools import preprocess if __name__ == '__main__': df = pd.read_sql_query('SELECT * FROM adc ORDER BY time DESC LIMIT 50000', conn) df = preprocess(df) print(df.head(1000)) dfh = df.resample('H').mean() print(dfh) dfh.to_sql('hour', conn, if_exists='replace') # meanh = MongoCollection('meanh') # meanh.collection.insert_many(dfh.to_dict('records'))
def main(argv): ### 8x8 PATCHES subplot(0, 0) # LINEAR MODELS # load importance weights for each model for model in linear_models: model['indices'] = [] model['loglik'] = [] for path in glob(model['path'][:-4] + '[0-9]*[0-9].xpck'): results = Experiment(path) if results['ais_weights'].shape[0] not in [1, 200, 300]: print path, '(IGNORE)' continue model['indices'].append(results['indices']) model['loglik'].append( logmeanexp(results['ais_weights'], 0).flatten() / log(2.) / 64.) if not model['loglik']: experiment = Experiment(model['path']) # whitening and DC transform wt = experiment['model'].model[1].transforms[0] dct = experiment['model'].transforms[0] # load test data data = load('data/vanhateren.{0}.0.npz'.format( experiment['parameters'][0]))['data'] data = preprocess(data, shuffle=False) for path in glob(model['path'][:-4] + 'ais_samples.[0-9]*[0-9].xpck'): results = Experiment(path) # incorporate log-likelihood of DC component and jacobian of whitening transform loglik_dc = experiment['model'].model[0].loglikelihood( dct(data[:, results['indices']])[:1]) loglik = logmeanexp(results['ais_weights'], 0) + wt.logjacobian() + loglik_dc model['indices'].append(results['indices']) model['loglik'].append(loglik.flatten() / 64. / log(2.)) # make sure each data point is used only once model['indices'] = hstack(model['indices']).tolist() model['indices'], idx = unique(model['indices'], return_index=True) model['loglik'] = hstack(model['loglik'])[idx] # find intersection of data points indices = [model['indices'] for model in linear_models] indices = list(set(indices[0]).intersection(*indices[1:])) print 'Using {0} data points for 8x8 patches.'.format(len(indices)) # use importance weights to estimate log-likelihood for idx, model in enumerate(linear_models): subset = [i in indices for i in model['indices']] # one estimate of the log-likelihood for each data point estimates = model['loglik'][asarray(subset)] model['loglik_mean'] = mean(estimates) model['loglik_sem'] = std(estimates, ddof=1) / sqrt(estimates.size) bar(idx + 2, model['loglik_mean'], yerr=model['loglik_sem'], color=model['color'], fill=model['fill'], bar_width=BAR_WIDTH, pgf_options=[ 'forget plot', 'nodes near coords', 'every node near coord/.style={yshift=0.05cm,font=\\footnotesize}' ]) # PRODUCT OF EXPERTS for idx, model in enumerate(poe): results = loadmat(model['path']) estimates = -results['E'] - results['logZ'] estimates = estimates.flatten() / 64. / log(2.) estimates = estimates[indices] model['loglik_mean'] = mean(estimates) model['loglik_sem'] = std(estimates, ddof=1) / sqrt(estimates.size) bar(idx + 6, model['loglik_mean'], yerr=model['loglik_sem'], color=model['color'], fill=model['fill'], bar_width=BAR_WIDTH, pgf_options=[ 'forget plot', 'nodes near coords', 'every node near coord/.style={yshift=0.05cm,font=\\footnotesize}' ]) # GAUSSIAN SCALE MIXTURE results = Experiment(gsm['path']) gsm['loglik_mean'] = mean(results['logliks'][:, indices]) gsm['loglik_sem'] = std(results['logliks'][:, indices], ddof=1) / sqrt( len(indices)) bar(1, gsm['loglik_mean'], yerr=gsm['loglik_sem'], color=gsm['color'], fill=gsm['fill'], bar_width=BAR_WIDTH, pattern=gsm['pattern'], pgf_options=[ 'forget plot', 'nodes near coords', 'every node near coord/.style={yshift=0.05cm, font=\\footnotesize}' ]) # GAUSSIAN results = Experiment(gaussian['path']) gaussian['loglik_mean'] = mean(results['logliks'][:, indices]) gaussian['loglik_sem'] = std(results['logliks'][:, indices], ddof=1) / sqrt(len(indices)) bar(0, gaussian['loglik_mean'], yerr=gaussian['loglik_sem'], color=gaussian['color'], fill=gaussian['fill'], bar_width=BAR_WIDTH, pgf_options=[ 'nodes near coords', 'every node near coord/.style={yshift=0.05cm, font=\\footnotesize}' ]) xtick(range(len(linear_models) + len(poe) + 2), [gaussian['label']] + \ [gsm['label']] + \ [model['label'] for model in linear_models] + \ [model['label'] for model in poe]) ytick([0.9, 1.1, 1.3, 1.5]) xlabel(r'\small Overcompleteness') ylabel(r'\small Log-likelihood $\pm$ SEM [bit/pixel]') axis(width=6, height=4, ytick_align='outside', axis_x_line='bottom', axis_y_line='left', pgf_options=[ 'xtick style={color=white}', r'tick label style={font=\footnotesize}', 'every outer x axis line/.append style={-}' ]) axis([-0.5, 8.5, 0.85, 1.65]) title(r'\small 8 $\times$ 8 image patches') ### 16x16 PATCHES subplot(0, 1) # dummy plots bar(-1, 0, color=gaussian['color'], fill=gaussian['fill'], bar_width=BAR_WIDTH) bar(-1, 0, color=gsm['color'], fill=gsm['fill'], pattern=gsm['pattern']) # LINEAR MODELS # load importance weights for each model for model in linear_models16: model['indices'] = [] model['loglik'] = [] for path in glob(model['path'][:-4] + '[0-9]*[0-9].xpck'): results = Experiment(path) model['indices'].append(results['indices']) model['loglik'].append( logmeanexp(results['ais_weights'], 0).flatten() / 256. / log(2.)) if not model['loglik']: experiment = Experiment(model['path']) # whitening and DC transform wt = experiment['model'].model[1].transforms[0] dct = experiment['model'].transforms[0] # load test data data = load('data/vanhateren.{0}.0.npz'.format( experiment['parameters'][0]))['data'] data = preprocess(data, shuffle=False) for path in glob(model['path'][:-4] + 'ais_samples.[0-9]*[0-9].xpck'): results = Experiment(path) # incorporate log-likelihood of DC component and jacobian of whitening transform loglik_dc = experiment['model'].model[0].loglikelihood( dct(data[:, results['indices']])[:1]) loglik = logmeanexp(results['ais_weights'], 0) + wt.logjacobian() + loglik_dc model['indices'].append(results['indices']) model['loglik'].append(loglik.flatten() / 256. / log(2.)) if not model['loglik']: print 'NO IMPORTANCE WEIGHTS FOUND FOR', model['path'] return 0 # make sure each data point is used only once model['indices'] = hstack(model['indices']).tolist() model['indices'], idx = unique(model['indices'], return_index=True) model['loglik'] = hstack(model['loglik'])[idx] # find intersection of data points indices = [model['indices'] for model in linear_models16] indices = list(set(indices[0]).intersection(*indices[1:])) print 'Using {0} data points for 16x16 patches.'.format(len(indices)) # use importance weights to estimate log-likelihood for idx, model in enumerate(linear_models16): subset = [i in indices for i in model['indices']] # exp(ais_weights) represent unbiased estimates of the likelihood estimates = model['loglik'][:, asarray(subset)] model['loglik_mean'] = mean(estimates) model['loglik_sem'] = std(estimates, ddof=1) / sqrt(estimates.size) bar(idx + 2, model['loglik_mean'], yerr=model['loglik_sem'], color=model['color'], fill=model['fill'], bar_width=BAR_WIDTH, pgf_options=[ 'forget plot', 'nodes near coords', 'every node near coord/.style={yshift=0.05cm,font=\\footnotesize}' ]) # PRODUCT OF EXPERTS for idx, model in enumerate(poe16): results = loadmat(model['path']) estimates = -results['E'] - results['logZ'] estimates = estimates.flatten() / 256. / log(2.) estimates = estimates[indices] model['loglik_mean'] = mean(estimates) model['loglik_sem'] = std(estimates, ddof=1) / sqrt(estimates.size) bar(idx + 6, model['loglik_mean'], yerr=model['loglik_sem'], color=model['color'], fill=model['fill'], bar_width=BAR_WIDTH, pgf_options=[ 'forget plot', 'nodes near coords', 'every node near coord/.style={yshift=0.05cm,font=\\footnotesize}' ]) # GAUSSIAN SCALE MIXTURE results = Experiment(gsm16['path']) gsm['loglik_mean'] = mean(results['logliks'][:, indices]) gsm['loglik_sem'] = std(results['logliks'][:, indices], ddof=1) / sqrt( len(indices)) bar(1, gsm['loglik_mean'], yerr=gsm['loglik_sem'], color=gsm16['color'], fill=gsm16['fill'], bar_width=BAR_WIDTH, pattern=gsm['pattern'], pgf_options=[ 'forget plot', 'nodes near coords', 'every node near coord/.style={yshift=0.05cm, font=\\footnotesize}' ]) # GAUSSIAN results = Experiment(gaussian16['path']) gaussian['loglik_mean'] = mean(results['logliks'][:, indices]) gaussian['loglik_sem'] = std(results['logliks'][:, indices], ddof=1) / sqrt(len(indices)) bar(0, gaussian['loglik_mean'], yerr=gaussian['loglik_sem'], color=gaussian['color'], fill=gaussian['fill'], bar_width=BAR_WIDTH, pgf_options=[ 'forget plot', 'nodes near coords', 'every node near coord/.style={yshift=0.05cm, font=\\footnotesize}' ]) xtick([0, 1, 2, 3, 4, 5, 6, 7, 8], ['-', '-', '1x', '2x', '3x', '4x', '2x', '3x', '4x']) ytick([0.9, 1.1, 1.3, 1.5]) xlabel(r'\small Overcompleteness') axis(width=6, height=4, ytick_align='outside', axis_x_line='bottom', axis_y_line='left', pgf_options=[ 'xtick style={color=white}', r'tick label style={font=\footnotesize}', 'every outer x axis line/.append style={-}' ]) axis([-0.5, 8.5, 0.85, 1.65]) title(r'\small 16 $\times$ 16 image patches') gcf().margin = 4 gcf().save('results/vanhateren/comparison.tex') # dummy plots bar(-1, 0, color=linear_models[0]['color'], fill=linear_models[0]['fill']) bar(-1, 0, color=linear_models[1]['color'], fill=linear_models[1]['fill']) bar(-1, 0, color=poe[0]['color'], fill=poe[0]['fill']) legend('Gaussian', 'GSM', 'LM', 'OLM', 'PoT', location='outer north east') savefig('results/vanhateren/comparison.tex') draw() return 0
#%% time clf = LGBMClassifier(num_leaves=80, objective='binary', max_depth=30, learning_rate=0.01, min_child_samples=20, random_state=2021, n_estimators=1000, subsample=0.9, colsample_bytree=0.9) clf.fit(X_train, y_train) # %% pred_y = clf.predict_proba(X_train)[:, -1] print("train:", roc_auc_score(y_train, pred_y)) pred_y = clf.predict_proba(X_test)[:, -1] print("test:", roc_auc_score(y_test, pred_y)) # %% import tools #%% test = pd.read_csv(f"{dirPath}rawData/testB.csv") test = tools.preprocess(test, savePath=f"{dirPath}data/test_v2.pkl") #%% test_sub = pd.read_csv(f"{dirPath}rawData/sample_submit.csv") pred_y = clf.predict_proba(test) test_sub['isDefault'] = pred_y[:, -1] test_sub.to_csv(f"{dirPath}submits/subMay5-12.csv", index=False) # %% # %%
time_list = [] for i in range(repeat): image = Image.open(args.content) if args.resize != 0: image = image.resize((args.resize, args.resize)) IMAGE_WIDTH, IMAGE_HEIGHT = image.size style = Image.open(args.style) torch.cuda.synchronize() start_time = time.time() if args.URST: aspect_ratio = IMAGE_WIDTH / IMAGE_HEIGHT thumbnail = image.resize((int(aspect_ratio * args.thumb_size), args.thumb_size)) patches = preprocess(image, padding=PADDING, patch_size=PATCH_SIZE, transform=content_tf, cuda=False) thumbnail = content_tf(thumbnail).unsqueeze(0).to(device) style = style_tf(style).unsqueeze(0).to(device) print("content:", patches.shape) print("thumb:", thumbnail.shape) print("style:", style.shape) with torch.no_grad(): sF = vgg(style) style_transfer_thumbnail(thumbnail, sF, save=False if args.test_speed else True, save_path=os.path.join(args.outf, "thumb-%d.jpg" % args.thumb_size)) style_transfer_high_resolution( patches, sF, padding=PADDING, collection=False, save_path=os.path.join(args.outf, "ours-patch%d-padding%d.jpg" % (PATCH_SIZE, PADDING)), save=False if args.test_speed else True
""" Robert Harrison Lucy Stuehrmann Brady Snowden """ from sklearn.neighbors import KNeighborsClassifier from sklearn.metrics import accuracy_score from tools import preprocess, confusion, visualize import matplotlib.pyplot as plt images_train, images_test, images_valid, labels_train, labels_test, labels_valid = preprocess( ) neigh = KNeighborsClassifier(n_neighbors=9, weights='distance') neigh.fit(images_train, labels_train) result = neigh.predict(images_test) matrix = confusion(labels_test, result) print(matrix) print(accuracy_score(labels_test, result)) figure, ax = plt.subplots() plt.ylabel('Predictions') plt.xlabel('Actual') plt.title('Confusion Matrix for KNearestNeighbor') plt.xticks([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) plt.yticks([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) ax.matshow(matrix, cmap=plt.cm.Spectral) x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] for i in x:
def main(argv): if len(argv) < 2: print 'Usage:', argv[0], '<param_id>', '[experiment]' print print ' {0:>3} {1:>7} {2:>5} {3:>5} {4:>5} {5:>5} {6:>5}'.format( 'ID', 'PS', 'OC', 'TI', 'FI', 'LP', 'SC') for id, params in enumerate(parameters): print ' {0:>3} {1:>7} {2:>5} {3:>5} {4:>5} {5:>5} {6:>5}'.format(id, *params) print print ' ID = parameter set' print ' PS = patch size' print ' OC = overcompleteness' print ' TI = number of training iterations' print ' FI = number of fine-tuning iterations' print ' LP = optimize marginal distributions' print ' SC = initialize with sparse coding' return 0 seterr(invalid='raise', over='raise', divide='raise') # start experiment experiment = Experiment() # hyperparameters patch_size, \ overcompleteness, \ max_iter, \ max_iter_ft, \ train_prior, \ sparse_coding = parameters[int(argv[1])] ### DATA PREPROCESSING # load data, log-transform and center data data = load('data/vanhateren.{0}.1.npz'.format(patch_size))['data'] data = data[:, :100000] data = preprocess(data) # discrete cosine transform and whitening transform dct = LinearTransform(dim=int(sqrt(data.shape[0])), basis='DCT') wt = WhiteningTransform(dct(data)[1:], symmetric=True) ### MODEL DEFINITION isa = ISA(num_visibles=data.shape[0] - 1, num_hiddens=data.shape[0] * overcompleteness - 1, ssize=1) # model DC component with a mixture of Gaussians model = StackedModel(dct, ConcatModel(MoGaussian(20), StackedModel(wt, isa))) ### MODEL TRAINING # variables to store in results experiment['model'] = model experiment['parameters'] = parameters[int(argv[1])] def callback(phase, isa, iteration): """ Saves intermediate results every few iterations. """ if not iteration % 5: # whitened filters A = dot(dct.A[1:].T, isa.A) patch_size = int(sqrt(A.shape[0]) + 0.5) # save intermediate results experiment.save('results/vanhateren.{0}/results.{1}.{2}.xpck'.format(argv[1], phase, iteration)) # visualize basis imsave('results/vanhateren.{0}/basis.{1}.{2:0>3}.png'.format(argv[1], phase, iteration), stitch(imformat(A.T.reshape(-1, patch_size, patch_size)))) if len(argv) > 2: # initialize model with trained model results = Experiment(argv[2]) model = results['model'] isa = model.model[1].model dct = model.transforms[0] experiment['model'] = model else: # enable regularization of marginals for gsm in isa.subspaces: gsm.gamma = 1e-3 gsm.alpha = 2. gsm.beta = 1. # train mixture of Gaussians on DC component model.train(data, 0, max_iter=100) # initialize filters and marginals model.initialize(data, 1) model.initialize(model=1, method='laplace') experiment.progress(10) if sparse_coding: # initialize with sparse coding if patch_size == '16x16': model.train(data, 1, method=('of', { 'max_iter': max_iter, 'noise_var': 0.05, 'var_goal': 1., 'beta': 10., 'step_width': 0.01, 'sigma': 0.3, }), callback=lambda isa, iteration: callback(0, isa, iteration)) else: model.train(data, 1, method=('of', { 'max_iter': max_iter, 'noise_var': 0.1, 'var_goal': 1., 'beta': 10., 'step_width': 0.01, 'sigma': 0.5, }), callback=lambda isa, iteration: callback(0, isa, iteration)) isa.orthogonalize() else: if patch_size == '16x16': # prevents out-of-memory mapp.max_processes = 1 # train model using a subset of the data model.train(data[:, :20000], 1, max_iter=max_iter, train_prior=train_prior, persistent=True, init_sampling_steps=5, method=('sgd', {'momentum': 0.8}), callback=lambda isa, iteration: callback(0, isa, iteration), sampling_method=('gibbs', {'num_steps': 1})) experiment.progress(50) if patch_size == '16x16': # prevents out-of-memory mapp.max_processes = 1 # disable regularization for gsm in isa.subspaces: gsm.gamma = 0. # fine-tune model using all the data model.train(data, 1, max_iter=max_iter_ft, train_prior=train_prior, train_subspaces=False, persistent=True, init_sampling_steps=10 if not len(argv) > 2 and (sparse_coding or not train_prior) else 50, method=('lbfgs', {'max_fun': 50}), callback=lambda isa, iteration: callback(1, isa, iteration), sampling_method=('gibbs', {'num_steps': 2})) experiment.save('results/vanhateren/vanhateren.{0}.{{0}}.{{1}}.xpck'.format(argv[1])) return 0
""" Robert Harrison Lucy Stuehrmann Brady Snowden """ from keras.models import Sequential from keras.layers import Dense, Activation from tools import preprocess, confusion import matplotlib.pyplot as plot # Img Preprocessing batch_size = 512 epochs = 2000 x_train, x_test, x_val, y_train, y_test, y_val = preprocess() # Begin Model model = Sequential() model.add(Dense(20, input_shape=(28 * 28,), kernel_initializer='he_normal')) model.add(Activation('relu')) model.add(Dense(15, kernel_initializer='he_normal')) model.add(Activation('selu')) model.add(Dense(12, kernel_initializer='glorot_uniform')) model.add(Activation('tanh')) model.add(Dense(10, kernel_initializer='he_normal')) model.add(Activation('softmax')) # Compile Model model.compile(optimizer='sgd', loss='categorical_crossentropy', metrics=['accuracy'])
def main(argv): seterr(over='raise', divide='raise', invalid='raise') try: if int(os.environ['OMP_NUM_THREADS']) > 1 or int( os.environ['MKL_NUM_THREADS']) > 1: print 'It seems that parallelization is turned on. This will skew the results. To turn it off:' print '\texport OMP_NUM_THREADS=1' print '\texport MKL_NUM_THREADS=1' except: print 'Parallelization of BLAS might be turned on. This could skew results.' experiment = Experiment(seed=42) if not os.path.exists(EXPERIMENT_PATH): print 'Could not find file \'{0}\'.'.format(EXPERIMENT_PATH) return 0 results = Experiment(EXPERIMENT_PATH) ica = results['model'].model[1].model # load test data data = load('data/vanhateren.{0}.0.npz'.format( results['parameters'][0]))['data'] data = data[:, :100000] data = preprocess(data) data = data[:, permutation(data.shape[1] / 2)[:NUM_SAMPLES]] # transform data dct = results['model'].transforms[0] wt = results['model'].model[1].transforms[0] data = wt(dct(data)[1:]) X = data for method in sampling_methods: # disable output and parallelization Distribution.VERBOSITY = 0 mapp.max_processes = 1 # measure time required by transition operator start = time() # initial hidden states Y = dot(pinv(ica.A), X) # increase number of steps to reduce overhead ica.sample_posterior( X, method=(method['method'], dict(method['parameters'], Y=Y, num_steps=method['parameters']['num_steps'] * NUM_STEPS_MULTIPLIER))) # time required per transition operator application duration = (time() - start) / NUM_STEPS_MULTIPLIER # enable output and parallelization Distribution.VERBOSITY = 2 mapp.max_processes = 2 energies = [mean(ica.prior_energy(Y))] # Markov chain for i in range(int(NUM_SECONDS / duration + 1.)): Y = ica.sample_posterior(X, method=(method['method'], dict(method['parameters'], Y=Y))) energies.append(mean(ica.prior_energy(Y))) plot(arange(len(energies)) * duration, energies, '-', color=method['color'], line_width=1.2, pgf_options=['forget plot'], comment=str(method['parameters'])) xlabel('time in seconds') ylabel('average energy') title('van Hateren') gca().width = 7 gca().height = 7 gca().xmin = -1 gca().xmax = NUM_SECONDS savefig('results/vanhateren/vanhateren_trace_.tex') return 0
def load_examples(a): CROP_SIZE = 256 if a.input_dir is None or not os.path.exists(a.input_dir): raise Exception("input_dir does not exist") input_paths = glob.glob(os.path.join(a.input_dir, "*.jpg")) decode = tf.image.decode_jpeg if len(input_paths) == 0: input_paths = glob.glob(os.path.join(a.input_dir, "*.png")) decode = tf.image.decode_png if len(input_paths) == 0: raise Exception("input_dir contains no image files") def get_name(path): name, _ = os.path.splitext(os.path.basename(path)) return name # if the image names are numbers, sort by the value rather than asciibetically # having sorted inputs means that the outputs are sorted in test mode if all(get_name(path).isdigit() for path in input_paths): input_paths = sorted(input_paths, key=lambda path: int(get_name(path))) else: input_paths = sorted(input_paths) with tf.name_scope("load_images"): path_queue = tf.train.string_input_producer(input_paths, shuffle=a.mode == "train") reader = tf.WholeFileReader() paths, contents = reader.read(path_queue) raw_input = decode(contents) raw_input = tf.image.convert_image_dtype(raw_input, dtype=tf.float32) assertion = tf.assert_equal(tf.shape(raw_input)[2], 3, message="image does not have 3 channels") with tf.control_dependencies([assertion]): raw_input = tf.identity(raw_input) raw_input.set_shape([None, None, 3]) #%% skipped if a.lab_colorization: # load color and brightness from image, no B image exists here lab = tools.rgb_to_lab(raw_input) L_chan, a_chan, b_chan = tools.preprocess_lab(lab) a_images = tf.expand_dims(L_chan, axis=2) b_images = tf.stack([a_chan, b_chan], axis=2) else: #%% # break apart image pair and move to range [-1, 1] width = tf.shape(raw_input)[1] # [height, width, channels] a_images = tools.preprocess(raw_input[:, :width // 2, :]) #left b_images = tools.preprocess(raw_input[:, width // 2:, :]) #right if a.which_direction == "AtoB": inputs, targets = [a_images, b_images] elif a.which_direction == "BtoA": inputs, targets = [b_images, a_images] else: raise Exception("invalid direction") # synchronize seed for image operations so that we do the same operations to both # input and output images seed = random.randint(0, 2**31 - 1) def transform(image): r = image if a.flip: r = tf.image.random_flip_left_right(r, seed=seed) # area produces a nice downscaling, but does nearest neighbor for upscaling # assume we're going to be doing downscaling here r = tf.image.resize_images(r, [a.scale_size, a.scale_size], method=tf.image.ResizeMethod.AREA) offset = tf.cast(tf.floor( tf.random_uniform([2], 0, a.scale_size - CROP_SIZE + 1, seed=seed)), dtype=tf.int32) if a.scale_size > CROP_SIZE: r = tf.image.crop_to_bounding_box(r, offset[0], offset[1], CROP_SIZE, CROP_SIZE) elif a.scale_size < CROP_SIZE: raise Exception("scale size cannot be less than crop size") return r with tf.name_scope("input_images"): input_images = transform(inputs) with tf.name_scope("target_images"): target_images = transform(targets) paths_batch, inputs_batch, targets_batch = tf.train.batch( [paths, input_images, target_images], batch_size=a.batch_size) steps_per_epoch = int(math.ceil(len(input_paths) / a.batch_size)) return Examples(paths=paths_batch, inputs=inputs_batch, targets=targets_batch, count=len(input_paths), steps_per_epoch=steps_per_epoch)
# -*- coding: utf-8 -*- import tools apikey = '' #Add your API key server = 'www.sefaria.org' books = [['Genesis','f_01588'],['Exodus','f_01589'],['Leviticus','f_01590'],['Numbers','f_01591'],['Deuteronomy','f_01592']] for book in books: filekey = book[1] ref = "Targum Jonathan on " + book[0] tools.createBookRecord(server, apikey, ref, "Targum Yonatan on " + book[0], "Targum") tools.preprocess(filekey) text_whole = tools.parseText(filekey,ref) tools.postText(server, apikey, ref ,text_whole)
PATCH_SIZE = args.patch_size PADDING = args.padding image = Image.open(args.content) IMAGE_WIDTH, IMAGE_HEIGHT = image.size torch.cuda.synchronize() start_time = time.time() if args.URST: aspect_ratio = IMAGE_WIDTH / IMAGE_HEIGHT thumbnail = image.resize( (int(aspect_ratio * args.thumb_size), args.thumb_size)) thumbnail = transform(thumbnail).unsqueeze(0).to(device) patches = preprocess(image, padding=PADDING, transform=transform, patch_size=PATCH_SIZE, cuda=False) print("patch:", patches.shape) print("thumbnail:", thumbnail.shape) with torch.no_grad(): style_transfer_thumbnail( thumbnail, save_path=os.path.join(args.outf, "thumb-%d.jpg" % args.thumb_size), save=False if args.test_speed else True) style_transfer_high_resolution( patches, padding=PADDING,
# -*- coding: utf-8 -*- import tools apikey = '' #Add your API key server = 'www.sefaria.org' books = [['Genesis', 'f_01588'], ['Exodus', 'f_01589'], ['Leviticus', 'f_01590'], ['Numbers', 'f_01591'], ['Deuteronomy', 'f_01592']] for book in books: filekey = book[1] ref = "Targum Jonathan on " + book[0] tools.createBookRecord(server, apikey, ref, "Targum Yonatan on " + book[0], "Targum") tools.preprocess(filekey) text_whole = tools.parseText(filekey, ref) tools.postText(server, apikey, ref, text_whole)
def main(argv): seterr(over='raise', divide='raise', invalid='raise') experiment = Experiment(seed=42) try: if int(os.environ['OMP_NUM_THREADS']) > 1 or int(os.environ['MKL_NUM_THREADS']) > 1: print 'It seems that parallelization is turned on. This will skew the results. To turn it off:' print '\texport OMP_NUM_THREADS=1' print '\texport MKL_NUM_THREADS=1' except: print 'Parallelization of BLAS might be turned on. This could skew results.' if not os.path.exists(EXPERIMENT_PATH): print 'Could not find file \'{0}\'.'.format(EXPERIMENT_PATH) return 0 results = Experiment(EXPERIMENT_PATH) ica = results['model'].model[1].model # load test data data = load('data/vanhateren.{0}.0.npz'.format(results['parameters'][0]))['data'] data = data[:, :100000] data = preprocess(data) data = data[:, permutation(data.shape[1] / 2)[:NUM_SAMPLES]] # transform data dct = results['model'].transforms[0] wt = results['model'].model[1].transforms[0] data = wt(dct(data)[1:]) # burn-in using Gibbs sampler X_ = data[:, :NUM_AUTOCORR] Y_ = ica.sample_posterior(X_, method=('gibbs', {'num_steps': NUM_BURN_IN_STEPS})) for method in sampling_methods: # disable output and parallelization for measuring time Distribution.VERBOSITY = 0 mapp.max_processes = 1 Y = ica.sample_prior(NUM_SAMPLES) X = dot(ica.A, Y) # measure time required by transition operator start = time() # increase number of steps to reduce overhead ica.sample_posterior(X, method=(method['method'], dict(method['parameters'], Y=Y, num_steps=method['parameters']['num_steps'] * NUM_STEPS_MULTIPLIER))) # time required per transition operator application duration = (time() - start) / NUM_STEPS_MULTIPLIER # number of mcmc steps to run for this method num_mcmc_steps = int(NUM_SECONDS_RUN / duration + 1.) num_autocorr_steps = int(NUM_SECONDS_VIS / duration + 1.) # enable output and parallelization Distribution.VERBOSITY = 2 mapp.max_processes = 12 # posterior samples Y = [Y_] # Markov chain for i in range(num_mcmc_steps): Y.append(ica.sample_posterior(X_, method=(method['method'], dict(method['parameters'], Y=Y[-1])))) ac = [] for j in range(NUM_AUTOCORR): # collect samples belonging to one posterior distribution S = hstack([Y[k][:, [j]] for k in range(num_mcmc_steps)]) # compute autocorrelation for j-th posterior ac = [autocorr(S, num_autocorr_steps)] # average and plot autocorrelation functions plot(arange(num_autocorr_steps) * duration, mean(ac, 0), '-', color=method['color'], line_width=1.2, comment=str(method['parameters'])) xlabel('time in seconds') ylabel('autocorrelation') title('van Hateren') gca().width = 7 gca().height = 7 gca().xmin = -1 gca().xmax = NUM_SECONDS_VIS savefig('results/vanhateren/vanhateren_autocorr2.tex') return 0
def main(argv): ### 8x8 PATCHES subplot(0, 0) # LINEAR MODELS # load importance weights for each model for model in linear_models: model['indices'] = [] model['loglik'] = [] for path in glob(model['path'][:-4] + '[0-9]*[0-9].xpck'): results = Experiment(path) if results['ais_weights'].shape[0] not in [1, 200, 300]: print path, '(IGNORE)' continue model['indices'].append(results['indices']) model['loglik'].append(logmeanexp(results['ais_weights'], 0).flatten() / log(2.) / 64.) if not model['loglik']: experiment = Experiment(model['path']) # whitening and DC transform wt = experiment['model'].model[1].transforms[0] dct = experiment['model'].transforms[0] # load test data data = load('data/vanhateren.{0}.0.npz'.format(experiment['parameters'][0]))['data'] data = preprocess(data, shuffle=False) for path in glob(model['path'][:-4] + 'ais_samples.[0-9]*[0-9].xpck'): results = Experiment(path) # incorporate log-likelihood of DC component and jacobian of whitening transform loglik_dc = experiment['model'].model[0].loglikelihood(dct(data[:, results['indices']])[:1]) loglik = logmeanexp(results['ais_weights'], 0) + wt.logjacobian() + loglik_dc model['indices'].append(results['indices']) model['loglik'].append(loglik.flatten() / 64. / log(2.)) # make sure each data point is used only once model['indices'] = hstack(model['indices']).tolist() model['indices'], idx = unique(model['indices'], return_index=True) model['loglik'] = hstack(model['loglik'])[idx] # find intersection of data points indices = [model['indices'] for model in linear_models] indices = list(set(indices[0]).intersection(*indices[1:])) print 'Using {0} data points for 8x8 patches.'.format(len(indices)) # use importance weights to estimate log-likelihood for idx, model in enumerate(linear_models): subset = [i in indices for i in model['indices']] # one estimate of the log-likelihood for each data point estimates = model['loglik'][asarray(subset)] model['loglik_mean'] = mean(estimates) model['loglik_sem'] = std(estimates, ddof=1) / sqrt(estimates.size) bar(idx + 2, model['loglik_mean'], yerr=model['loglik_sem'], color=model['color'], fill=model['fill'], bar_width=BAR_WIDTH, pgf_options=[ 'forget plot', 'nodes near coords', 'every node near coord/.style={yshift=0.05cm,font=\\footnotesize}']) # PRODUCT OF EXPERTS for idx, model in enumerate(poe): results = loadmat(model['path']) estimates = -results['E'] - results['logZ'] estimates = estimates.flatten() / 64. / log(2.) estimates = estimates[indices] model['loglik_mean'] = mean(estimates) model['loglik_sem'] = std(estimates, ddof=1) / sqrt(estimates.size) bar(idx + 6, model['loglik_mean'], yerr=model['loglik_sem'], color=model['color'], fill=model['fill'], bar_width=BAR_WIDTH, pgf_options=[ 'forget plot', 'nodes near coords', 'every node near coord/.style={yshift=0.05cm,font=\\footnotesize}']) # GAUSSIAN SCALE MIXTURE results = Experiment(gsm['path']) gsm['loglik_mean'] = mean(results['logliks'][:, indices]) gsm['loglik_sem'] = std(results['logliks'][:, indices], ddof=1) / sqrt(len(indices)) bar(1, gsm['loglik_mean'], yerr=gsm['loglik_sem'], color=gsm['color'], fill=gsm['fill'], bar_width=BAR_WIDTH, pattern=gsm['pattern'], pgf_options=[ 'forget plot', 'nodes near coords', 'every node near coord/.style={yshift=0.05cm, font=\\footnotesize}']) # GAUSSIAN results = Experiment(gaussian['path']) gaussian['loglik_mean'] = mean(results['logliks'][:, indices]) gaussian['loglik_sem'] = std(results['logliks'][:, indices], ddof=1) / sqrt(len(indices)) bar(0, gaussian['loglik_mean'], yerr=gaussian['loglik_sem'], color=gaussian['color'], fill=gaussian['fill'], bar_width=BAR_WIDTH, pgf_options=['nodes near coords', 'every node near coord/.style={yshift=0.05cm, font=\\footnotesize}']) xtick(range(len(linear_models) + len(poe) + 2), [gaussian['label']] + \ [gsm['label']] + \ [model['label'] for model in linear_models] + \ [model['label'] for model in poe]) ytick([0.9, 1.1, 1.3, 1.5]) xlabel(r'\small Overcompleteness') ylabel(r'\small Log-likelihood $\pm$ SEM [bit/pixel]') axis( width=6, height=4, ytick_align='outside', axis_x_line='bottom', axis_y_line='left', pgf_options=[ 'xtick style={color=white}', r'tick label style={font=\footnotesize}', 'every outer x axis line/.append style={-}']) axis([-0.5, 8.5, 0.85, 1.65]) title(r'\small 8 $\times$ 8 image patches') ### 16x16 PATCHES subplot(0, 1) # dummy plots bar(-1, 0, color=gaussian['color'], fill=gaussian['fill'], bar_width=BAR_WIDTH) bar(-1, 0, color=gsm['color'], fill=gsm['fill'], pattern=gsm['pattern']) # LINEAR MODELS # load importance weights for each model for model in linear_models16: model['indices'] = [] model['loglik'] = [] for path in glob(model['path'][:-4] + '[0-9]*[0-9].xpck'): results = Experiment(path) model['indices'].append(results['indices']) model['loglik'].append(logmeanexp(results['ais_weights'], 0).flatten() / 256. / log(2.)) if not model['loglik']: experiment = Experiment(model['path']) # whitening and DC transform wt = experiment['model'].model[1].transforms[0] dct = experiment['model'].transforms[0] # load test data data = load('data/vanhateren.{0}.0.npz'.format(experiment['parameters'][0]))['data'] data = preprocess(data, shuffle=False) for path in glob(model['path'][:-4] + 'ais_samples.[0-9]*[0-9].xpck'): results = Experiment(path) # incorporate log-likelihood of DC component and jacobian of whitening transform loglik_dc = experiment['model'].model[0].loglikelihood(dct(data[:, results['indices']])[:1]) loglik = logmeanexp(results['ais_weights'], 0) + wt.logjacobian() + loglik_dc model['indices'].append(results['indices']) model['loglik'].append(loglik.flatten() / 256. / log(2.)) if not model['loglik']: print 'NO IMPORTANCE WEIGHTS FOUND FOR', model['path'] return 0 # make sure each data point is used only once model['indices'] = hstack(model['indices']).tolist() model['indices'], idx = unique(model['indices'], return_index=True) model['loglik'] = hstack(model['loglik'])[idx] # find intersection of data points indices = [model['indices'] for model in linear_models16] indices = list(set(indices[0]).intersection(*indices[1:])) print 'Using {0} data points for 16x16 patches.'.format(len(indices)) # use importance weights to estimate log-likelihood for idx, model in enumerate(linear_models16): subset = [i in indices for i in model['indices']] # exp(ais_weights) represent unbiased estimates of the likelihood estimates = model['loglik'][:, asarray(subset)] model['loglik_mean'] = mean(estimates) model['loglik_sem'] = std(estimates, ddof=1) / sqrt(estimates.size) bar(idx + 2, model['loglik_mean'], yerr=model['loglik_sem'], color=model['color'], fill=model['fill'], bar_width=BAR_WIDTH, pgf_options=[ 'forget plot', 'nodes near coords', 'every node near coord/.style={yshift=0.05cm,font=\\footnotesize}']) # PRODUCT OF EXPERTS for idx, model in enumerate(poe16): results = loadmat(model['path']) estimates = -results['E'] - results['logZ'] estimates = estimates.flatten() / 256. / log(2.) estimates = estimates[indices] model['loglik_mean'] = mean(estimates) model['loglik_sem'] = std(estimates, ddof=1) / sqrt(estimates.size) bar(idx + 6, model['loglik_mean'], yerr=model['loglik_sem'], color=model['color'], fill=model['fill'], bar_width=BAR_WIDTH, pgf_options=[ 'forget plot', 'nodes near coords', 'every node near coord/.style={yshift=0.05cm,font=\\footnotesize}']) # GAUSSIAN SCALE MIXTURE results = Experiment(gsm16['path']) gsm['loglik_mean'] = mean(results['logliks'][:, indices]) gsm['loglik_sem'] = std(results['logliks'][:, indices], ddof=1) / sqrt(len(indices)) bar(1, gsm['loglik_mean'], yerr=gsm['loglik_sem'], color=gsm16['color'], fill=gsm16['fill'], bar_width=BAR_WIDTH, pattern=gsm['pattern'], pgf_options=[ 'forget plot', 'nodes near coords', 'every node near coord/.style={yshift=0.05cm, font=\\footnotesize}']) # GAUSSIAN results = Experiment(gaussian16['path']) gaussian['loglik_mean'] = mean(results['logliks'][:, indices]) gaussian['loglik_sem'] = std(results['logliks'][:, indices], ddof=1) / sqrt(len(indices)) bar(0, gaussian['loglik_mean'], yerr=gaussian['loglik_sem'], color=gaussian['color'], fill=gaussian['fill'], bar_width=BAR_WIDTH, pgf_options=['forget plot', 'nodes near coords', 'every node near coord/.style={yshift=0.05cm, font=\\footnotesize}']) xtick([0, 1, 2, 3, 4, 5, 6, 7, 8], ['-', '-', '1x', '2x', '3x', '4x', '2x', '3x', '4x']) ytick([0.9, 1.1, 1.3, 1.5]) xlabel(r'\small Overcompleteness') axis( width=6, height=4, ytick_align='outside', axis_x_line='bottom', axis_y_line='left', pgf_options=[ 'xtick style={color=white}', r'tick label style={font=\footnotesize}', 'every outer x axis line/.append style={-}']) axis([-0.5, 8.5, 0.85, 1.65]) title(r'\small 16 $\times$ 16 image patches') gcf().margin = 4 gcf().save('results/vanhateren/comparison.tex') # dummy plots bar(-1, 0, color=linear_models[0]['color'], fill=linear_models[0]['fill']) bar(-1, 0, color=linear_models[1]['color'], fill=linear_models[1]['fill']) bar(-1, 0, color=poe[0]['color'], fill=poe[0]['fill']) legend('Gaussian', 'GSM', 'LM', 'OLM', 'PoT', location='outer north east') savefig('results/vanhateren/comparison.tex') draw() return 0
rets = client.gather(client.map(build_dataset_, datasets)) else: rets = [] for ds in datasets: rets.append(build_dataset_(ds, progress_bar=True)) tick1 = time.time() print('Loading took {0} s.'.format(tick1 - tick)) if step2: paths = glob.glob(INTERMEDIATE_PATH + '/*.pickle') #paths = [p for p in paths if "1HB(d)" in p] # Convert data into a Pandas dataframe # containing all trigger decisions for each L2 df = preprocess(paths, only_overlap_events=True) print(df) print(df.columns) tick2 = time.time() # Train the DNN pars = { 'features': features, 'filter': df.has_matched_gen, 'label': LABEL, 'output_path': OUTPUT_PATH, 'epochs': 100, #100 'b_or_e': B_OR_E, 'save_model': save_model, 'save_loss_plot': save_loss_plot, 'save_metadata': save_metadata,
def load_examples(a): if a.input_dir is None or not os.path.exists(a.input_dir): raise Exception("input_dir does not exist") input_paths = glob.glob(os.path.join(a.input_dir, "*.jpg")) decode = tf.image.decode_jpeg if len(input_paths) == 0: input_paths = glob.glob(os.path.join(a.input_dir, "*.png")) decode = tf.image.decode_png if len(input_paths) == 0: raise Exception("input_dir contains no image files") def get_name(path): name, _ = os.path.splitext(os.path.basename(path)) return name # if the image names are numbers, sort by the value rather than asciibetically # having sorted inputs means that the outputs are sorted in test mode if all(get_name(path).isdigit() for path in input_paths): input_paths = sorted(input_paths, key=lambda path: int(get_name(path))) else: input_paths = sorted(input_paths) with tf.name_scope("load_images"): path_queue = tf.train.string_input_producer(input_paths, shuffle=a.mode == "train") reader = tf.WholeFileReader() paths, contents = reader.read(path_queue) raw_input = decode(contents) raw_input = tf.image.convert_image_dtype(raw_input, dtype=tf.float32) assertion = tf.assert_equal(tf.shape(raw_input)[2], 1, message="image does not have 1 channels") with tf.control_dependencies([assertion]): raw_input = tf.identity(raw_input) raw_input.set_shape([32, 64, 1]) #%% # break apart image pair and move to range [-1, 1] width = tf.shape(raw_input)[1] # [height, width, channels] a_images = tools.preprocess(raw_input[:, :width // 2, :]) #left #[-1,1] b_images = tools.preprocess(raw_input[:, width // 2:, :]) #right if a.which_direction == "AtoB": inputs, targets = [a_images, b_images] elif a.which_direction == "BtoA": inputs, targets = [b_images, a_images] else: raise Exception("invalid direction") def transform(image): r = tf.image.resize_images(image, [a.scale_size, a.scale_size], method=tf.image.ResizeMethod.AREA) return r with tf.name_scope("input_images"): input_images = transform(inputs) with tf.name_scope("target_images"): target_images = transform(targets) paths_batch, inputs_batch, targets_batch = tf.train.batch( [paths, input_images, target_images], batch_size=a.batch_size) steps_per_epoch = int(math.ceil(len(input_paths) / a.batch_size)) return Examples(paths=paths_batch, inputs=inputs_batch, targets=targets_batch, count=len(input_paths), steps_per_epoch=steps_per_epoch)
import sys from user_based import * from tools import file2dic, file2dic_user, preprocess if __name__ == "__main__": if len(sys.argv) < 3: print "Usage: python " + sys.argv[0] + " data_file target_offering_file" exit(1) #ten_offering_Korea = [53, 159, 5, 92, 156, 81, 117, 135, 134, 132] [data_dic, user_list, item_list] = preprocess(sys.argv[1]) [train_mat, test_mat] = purchase_matrix(data_dic, user_list, item_list) target_offering = [] data = open( sys.argv[2], 'r' ) for line in data: line = line.replace('\n', '') if item_list.count( line ) > 0: target_offering.append( item_list.index(line) + 1 ) print "Users: " + str( len(user_list) ) print "Items: " + str( len(item_list) ) sim_mat = user_similarity(train_mat, 0) related_user = related_users( sim_mat, 10 ) user_item_mat = predict_user_based(train_mat, related_user)
def main(argv): if len(argv) < 2: print 'Usage:', argv[0], '<param_id>' print print ' {0:>3} {1:>7} {2:>5} {3:>5} {4:>5}'.format( 'ID', 'PS', 'NS', 'TI', 'DC') for id, params in enumerate(parameters): print ' {0:>3} {1:>7} {2:>5} {3:>5} {4:>5}'.format(id, *params) print print ' ID = parameter set' print ' PS = patch size' print ' NS = number of scales' print ' TI = number of training iterations' print ' DC = model DC component separately' return 0 # start experiment experiment = Experiment(server='10.38.138.150') # hyperparameters patch_size, num_scales, max_iter, separate_dc = parameters[int(argv[1])] ### DATA PREPROCESSING # load data, log-transform and center data data = load('data/vanhateren.{0}.1.npz'.format(patch_size))['data'] data = data[:, :100000] data = preprocess(data) ### MODEL DEFINITION AND TRAINING if separate_dc: # discrete cosine transform and symmetric whitening transform dct = LinearTransform(dim=int(sqrt(data.shape[0])), basis='DCT') wt = WhiteningTransform(dct(data)[1:], symmetric=True) model = StackedModel(dct, ConcatModel( MoGaussian(20), StackedModel(wt, GSM(data.shape[0] - 1, num_scales)))) else: # symmetric whitening transform wt = WhiteningTransform(data, symmetric=True) model = StackedModel(wt, GSM(data.shape[0], num_scales)) ### MODEL TRAINING AND EVALUATION model.train(data, max_iter=max_iter, tol=1e-7) # load and preprocess test data data = load('data/vanhateren.{0}.0.npz'.format(patch_size))['data'] data = preprocess(data, shuffle=False) # log-likelihod in [bit/pixel] logliks = model.loglikelihood(data) / log(2.) / data.shape[0] loglik = mean(logliks) sem = std(logliks, ddof=1) / sqrt(logliks.shape[1]) print 'log-likelihood: {0:.4f} +- {1:.4f} [bit/pixel]'.format(loglik, sem) experiment['logliks'] = logliks experiment['loglik'] = loglik experiment['sem'] = sem experiment.save('results/vanhateren/gsm.{0}.{{0}}.{{1}}.xpck'.format(argv[1])) return 0
def main(argv): seterr(over='raise', divide='raise', invalid='raise') try: if int(os.environ['OMP_NUM_THREADS']) > 1 or int(os.environ['MKL_NUM_THREADS']) > 1: print 'It seems that parallelization is turned on. This will skew the results. To turn it off:' print '\texport OMP_NUM_THREADS=1' print '\texport MKL_NUM_THREADS=1' except: print 'Parallelization of BLAS might be turned on. This could skew results.' experiment = Experiment(seed=42) if not os.path.exists(EXPERIMENT_PATH): print 'Could not find file \'{0}\'.'.format(EXPERIMENT_PATH) return 0 results = Experiment(EXPERIMENT_PATH) ica = results['model'].model[1].model # load test data data = load('data/vanhateren.{0}.0.npz'.format(results['parameters'][0]))['data'] data = data[:, :100000] data = preprocess(data) data = data[:, permutation(data.shape[1] / 2)[:NUM_SAMPLES]] # transform data dct = results['model'].transforms[0] wt = results['model'].model[1].transforms[0] data = wt(dct(data)[1:]) X = data for method in sampling_methods: # disable output and parallelization Distribution.VERBOSITY = 0 mapp.max_processes = 1 # measure time required by transition operator start = time() # initial hidden states Y = dot(pinv(ica.A), X) # increase number of steps to reduce overhead ica.sample_posterior(X, method=(method['method'], dict(method['parameters'], Y=Y, num_steps=method['parameters']['num_steps'] * NUM_STEPS_MULTIPLIER))) # time required per transition operator application duration = (time() - start) / NUM_STEPS_MULTIPLIER # enable output and parallelization Distribution.VERBOSITY = 2 mapp.max_processes = 2 energies = [mean(ica.prior_energy(Y))] # Markov chain for i in range(int(NUM_SECONDS / duration + 1.)): Y = ica.sample_posterior(X, method=(method['method'], dict(method['parameters'], Y=Y))) energies.append(mean(ica.prior_energy(Y))) plot(arange(len(energies)) * duration, energies, '-', color=method['color'], line_width=1.2, pgf_options=['forget plot'], comment=str(method['parameters'])) xlabel('time in seconds') ylabel('average energy') title('van Hateren') gca().width = 7 gca().height = 7 gca().xmin = -1 gca().xmax = NUM_SECONDS savefig('results/vanhateren/vanhateren_trace_.tex') return 0