def _build_data_generators(self): #TODO: we should use it under preproc template_filepath = None # if template is undefined, target is the same volume but not moved (motion correction) if self._use_template: template_filepath = os.path.join(self._data_dir, "template_on_grid") # if unsupervised, network output is not a transformation but target itself params_gen = dict(list_files=self._list_files, template_file=template_filepath, is_unsupervised=self._unsupervised, batch_size=self._batch_size, avail_cores=self._ncpu) self.train_gen = DataGenerator(partition="train", **params_gen) self.valid_gen = DataGenerator(partition="valid", **params_gen) self.test_gen = DataGenerator(partition="test", **params_gen)
def test_get_random_data_range(self): data_generator = DataGenerator(0, 99) data = data_generator.get_random_data(days=31, anomaly_count=0) self.assertEqual(list, type(data)) self.assertEqual(31 * 24, len(data)) values = [x[1] for x in data] self.assertEqual(True, all(value in range(0, 100) for value in values), "Encountered unexpected anomaly")
def main(_): skip_layers = ['fc8'] # no pre-trained weights train_layers = ['fc8'] finetune_layers = [ 'fc7', 'fc6', 'conv5', 'conv4', 'conv3', 'conv2', 'conv1' ] writer_dir = "logs/{}".format(FLAGS.dataset) checkpoint_dir = "checkpoints/{}".format(FLAGS.dataset) if tf.gfile.Exists(writer_dir): tf.gfile.DeleteRecursively(writer_dir) tf.gfile.MakeDirs(writer_dir) if tf.gfile.Exists(checkpoint_dir): tf.gfile.DeleteRecursively(checkpoint_dir) tf.gfile.MakeDirs(checkpoint_dir) generator = DataGenerator(data_dir=FLAGS.data_dir, dataset=FLAGS.dataset, batch_size=FLAGS.batch_size, num_threads=FLAGS.num_threads) model = VS_CNN(num_classes=2, skip_layers=skip_layers) loss = loss_fn(model) warm_up, train_op, learning_rate = train_fn(loss, generator, finetune_layers, train_layers) saver = tf.train.Saver(max_to_keep=FLAGS.num_checkpoints) config = tf.ConfigProto(allow_soft_placement=FLAGS.allow_soft_placement) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: logger = Logger(writer_dir, sess.graph) result_file = open('result_{}_base.txt'.format(FLAGS.dataset), 'w') sess.run(tf.global_variables_initializer()) model.load_initial_weights(sess) print("{} Start training...".format(datetime.now())) print("{} Open Tensorboard at --logdir {}".format( datetime.now(), writer_dir)) for epoch in range(FLAGS.num_epochs): print("\n{} Epoch: {}/{}".format(datetime.now(), epoch + 1, FLAGS.num_epochs)) result_file.write("\n{} Epoch: {}/{}\n".format( datetime.now(), epoch + 1, FLAGS.num_epochs)) if epoch < 20: update_op = warm_up else: update_op = train_op train(sess, model, generator, update_op, learning_rate, loss, epoch, logger) test(sess, model, generator, result_file) # save_model(sess, saver, epoch, checkpoint_dir) result_file.close()
def get_data_generator(self): images = [] for dirname, dirnames, filenames in os.walk(self.data_path): images += [os.path.join(dirname, f) for f in filenames] self.nbatch = int(np.ceil(len(images) / self.batch_size)) return DataGenerator(images, image_size=self.image_size, batch_size=self.batch_size)
def get_data_generator(data_path, batch_size, image_size): images = [] for dirname, dirnames, filenames in os.walk(data_path): images += [os.path.join(dirname, f) for f in filenames] train_images, test_images = train_test_split(images) test_images, eval_images = train_test_split(test_images) nbatch = int(np.ceil(len(images) / batch_size)) return DataGenerator(train_images, image_size=image_size, batch_size=batch_size), DataGenerator( test_images, image_size=image_size, batch_size=batch_size), DataGenerator( eval_images, image_size=image_size, batch_size=batch_size)
def __init__(self, args, backbone): self.args = args self.backbone = backbone self._model = None self.train_gen = DataGenerator(args, siamese=True) self.n_labels = self.train_gen.n_labels self.build_model() self.load_eval_dataset() self.accuracy = 0
def __init__(self, args): """Copy user-defined configs. Build backbone and fcn network models. """ self.args = args self.fcn = None self.train_generator = DataGenerator(args) self.build_model() self.eval_init()
def test_get_random_data_outliers(self): data_generator = DataGenerator(0, 99) data = data_generator.get_random_data(days=31, anomaly_count=5) self.assertEqual(list, type(data)) self.assertEqual(31 * 24, len(data)) values = [x[1] for x in data] anomalies = list(filter(lambda x: x < 0 or x > 99, values)) self.assertEqual(5, len(anomalies), "Encountered unexpected number of anomalies")
def run(batch_id, source_file_name, output_file_name, reference_date=today): data_gen = DataGenerator() # load source file source_columns = ['External_Id__c', 'Name', 'UserRole.Name'] data_gen.load_source_file(source_file_name, source_columns) data_gen.filter(lambda cv: 'RVP' not in cv['UserRole.Name']) data_gen.rename_column('External_Id__c', 'QuotaOwner_Id__c') data_gen.rename_column('Name', 'OwnerName__c') # generate id data_gen.add_formula_column( 'External_Id__c', formula=lambda: 'W_Quota.' + str(data_gen.current_row + 1)) data_gen.duplicate_rows(24) def quota_formula(): # first month of quarter = 300k # second month of quarter = 500k # third month of quarter = 500k quarter = data_gen.current_row % 3 if quarter == 0: return 300000 elif quarter == 1: return 750000 else: return 500000 data_gen.add_formula_column('QuotaAmount__c', quota_formula) current_year = reference_date.year last_year = current_year - 1 def start_date_formula(): user_row = data_gen.current_row % 24 month = str((user_row % 12) + 1).zfill(2) day = '01' if user_row < 12: year = str(last_year) else: year = str(current_year) return dateutil.parser.parse(year + '-' + month + '-' + day).date() data_gen.add_formula_column('StartDate__c', start_date_formula) # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) # apply transformations and write file data_gen.apply_transformations() data_gen.write(output_file_name, [ 'External_Id__c', 'QuotaOwner_Id__c', 'OwnerName__c', 'StartDate__c', 'QuotaAmount__c' ])
def train(): model = Model(args) sampler = DataGenerator(args) all_val_loss = [] with tf.Session() as sess: tf.global_variables_initializer().run() start = time.time() model_name = "./models/{}_{}_{}/{}_{}".format(args.model_type, args.x_latent_size, args.rnn_size, args.model_type, 'init') model.restore(sess, model_name) for epoch in range(args.num_epochs): all_loss = [] for batch_idx in range( int(sampler.total_traj_num / args.batch_size)): batch_data, batch_sd = sampler.next_batch(args.batch_size, sd=True) batch_s, batch_d = batch_sd feed = dict(zip(model.input_form, batch_data)) feed[model.s_inputs] = batch_s feed[model.d_inputs] = batch_d rec_loss, cate_loss, latent_loss, _ = sess.run([ model.rec_loss, model.cate_loss, model.latent_loss, model.train_op ], feed) sd_loss, _ = sess.run([model.sd_loss, model.sd_train_op], feed) all_loss.append([rec_loss, cate_loss, latent_loss, sd_loss]) val_loss = compute_output(model.loss, sess, model, sampler, purpose="val", callback=np.mean) # if len(all_val_loss) > 0 and val_loss >= all_val_loss[-1]: # print("Early termination with val loss: {}:".format(val_loss)) # break all_val_loss.append(val_loss) end = time.time() print( f"epoch: {epoch}\tval loss: {val_loss}\telapsed time: {end - start}\tloss: {np.mean(all_loss, axis=0)}" ) start = time.time() save_model_name = "./models/{}_{}_{}/{}_{}".format( args.model_type, args.x_latent_size, args.rnn_size, args.model_type, epoch) model.save(sess, save_model_name)
def test_model(self, num_tests=200): data_generator_test = DataGenerator( datasource="omniglot", num_classes=self.n_classes_per_task, num_samples_per_class=self.num_shot_train + self.num_shot_test, batch_size=1, test_set=True) batch_img, batch_label = data_generator_test.make_data_tensor( train=False) inner_input = tf.slice( batch_img, [0, 0, 0], [-1, self.n_classes_per_task * self.num_shot_train, -1]) outer_input = tf.slice( batch_img, [0, self.n_classes_per_task * self.num_shot_train, 0], [-1, -1, -1]) inner_label = tf.slice( batch_label, [0, 0, 0], [-1, self.n_classes_per_task * self.num_shot_train, -1]) outer_label = tf.slice( batch_label, [0, self.n_classes_per_task * self.num_shot_train, 0], [-1, -1, -1]) inner_input = tf.reshape( inner_input, [-1, self.n_classes_per_task * self.num_shot_train] + self.input_dim) outer_input = tf.reshape( outer_input, [-1, self.n_classes_per_task * self.num_shot_test] + self.input_dim) correct_prediction = 0 tf.train.start_queue_runners(sess=self.sess) for ind in range(num_tests): print("Meta-Testing task {}".format(ind)) if ind == 0: task_result = self.sess.run( self.meta_learn_task((inner_input[0], inner_label[0], outer_input[0], outer_label[0]), reuse=False, test=True)) else: task_result = self.sess.run( self.meta_learn_task((inner_input[0], inner_label[0], outer_input[0], outer_label[0]), test=True)) predictions, label = task_result["outputs"][-1] predictions = predictions.reshape( (self.num_shot_test * self.n_classes_per_task, )) # print(predictions) label = label.reshape( (self.num_shot_test * self.n_classes_per_task, )) # print(label) for idx, prediction in enumerate(predictions): if prediction == label[idx]: correct_prediction += 1 accuracy = float( correct_prediction / (num_tests * self.num_shot_test * self.n_classes_per_task)) * 100 print("Model Accuracy : {} %".format(accuracy))
def inference_development_data_bottleneck_features(args): # Arugments & parameters workspace = args.workspace validate = args.validate holdout_fold = args.holdout_fold iteration = args.iteration cuda = args.cuda batch_size = 64 filename = 'main_pytorch' # Paths dev_hdf5_path = os.path.join(workspace, 'features', 'logmel', 'development.h5') if validate: model_path = os.path.join(workspace, 'models', filename, 'holdout_fold={}'.format(holdout_fold), 'md_{}_iters.tar'.format(iteration)) bottleneck_hdf5_path = os.path.join( workspace, 'bottlenecks', filename, 'dev_holdout_fold={}'.format(holdout_fold), '{}_iters'.format(iteration), 'bottleneck.h5') else: model_path = os.path.join(workspace, 'models', filename, 'full_train', 'md_{}_iters.tar'.format(iteration)) bottleneck_hdf5_path = os.path.join( workspace, 'bottlenecks', filename, 'dev_full_train', '{}_iters'.format(iteration), 'bottleneck.h5') create_folder(os.path.dirname(bottleneck_hdf5_path)) # Load model model = Model() checkpoint = torch.load(model_path) model.load_state_dict(checkpoint['state_dict']) if cuda: model.cuda() # Data generator generator = DataGenerator(hdf5_path=dev_hdf5_path, batch_size=batch_size, validation_csv=None, holdout_fold=None) generate_func = generator.generate_validate( data_type='train', shuffle=False, max_iteration=None) # Write bottleneck features write_bottleneck_features_to_hdf5( model, generate_func, bottleneck_hdf5_path, cuda, return_target=True)
def setUpClass(cls): # generate data dgen = DataGenerator() dgen.gen_data() cls.data = dgen.data cls.wn = dgen.wn cls.time = dgen.time # prevents plt.show() from blocking execution plt.ion()
def main(): data_generator = DataGenerator( dataset_path='/Users/tomassykora/Projects/school/siamese-img-quality-assessment/live2' ) estimator = IQAEstimator( model=load_model('model.h5'), test_images=data_generator.test_images ) estimator.full_test_set_eval()
def run(batch_id, source_file_name, output_file_name): data_gen = DataGenerator() # load source file data_gen.load_source_file(source_file_name) data_gen.write(output_file_name, columns=[ 'External_ID__c', 'Name', ])
def __init__(self, sess): """ :param sess: the tensorflow session """ self.sess = sess self.config = GanConfig() self.model = GanModel(self.config) self.data = DataGenerator(self.config) self.trainer = GanTrainer(self.sess, self.model, self.data, self.config)
def build_generator(self): """Build a multi-thread train data generator.""" self.train_generator = \ DataGenerator(args=self.args, dictionary=self.dictionary, n_classes=self.n_classes, feature_shapes=self.feature_shapes, n_anchors=self.n_anchors, shuffle=True)
def train(gpu_num=None, with_generator=False, show_info=True): print('network creating ... ', end='', flush=True) network = UNetPP(INPUT_IMAGE_SHAPE, start_filter=START_FILTER, depth=DEPTH, class_num=CLASS_NUM) print('... created') if show_info: network.plot_model_summary('../model_plot.png') network.show_model_summary() if isinstance(gpu_num, int): model = network.get_parallel_model(gpu_num, with_comple=True) else: model = network.get_model(with_comple=True) model_filename = os.path.join(DIR_MODEL, File_MODEL) callbacks = [ KC.TensorBoard() , HistoryCheckpoint(filepath='LearningCurve_{history}.png' , verbose=1 , period=10 ) , KC.ModelCheckpoint(filepath=model_filename , verbose=1 , save_weights_only=True #, save_best_only=True , period=10 ) ] print('data generator creating ... ', end='', flush=True) train_generator = DataGenerator(DIR_INPUTS, DIR_TEACHERS, INPUT_IMAGE_SHAPE) print('... created') if with_generator: train_data_num = train_generator.data_size() #valid_data_num = train_generator.valid_data_size() his = model.fit_generator(train_generator.generator(batch_size=BATCH_SIZE) , steps_per_epoch=math.ceil(train_data_num / BATCH_SIZE) , epochs=EPOCHS , verbose=1 , use_multiprocessing=True , callbacks=callbacks #, validation_data=valid_generator #, validation_steps=math.ceil(valid_data_num / BATCH_SIZE) ) else: print('data generateing ... ') #, end='', flush=True) inputs, teachers = train_generator.generate_data() print('... generated') history = model.fit(inputs, teachers, batch_size=BATCH_SIZE, epochs=EPOCHS , shuffle=True, verbose=1, callbacks=callbacks) print('model saveing ... ', end='', flush=True) model.save_weights(model_filename) print('... saved') print('learning_curve saveing ... ', end='', flush=True) save_learning_curve(history) print('... saved')
def vae_dev(nobin_training_data, intermediate_dim, latent_dim, latent_fac, epochs, batch_size=1): n_neuron = nobin_training_data[0].shape[-1] training_generator = DataGenerator(nobin_training_data, nobin_training_data, batch_size=batch_size) vae, _, vae_encoder2 = create_lstm_vae(input_dim=n_neuron, timesteps=None, intermediate_dim=intermediate_dim, latent_dim=latent_dim, latent_fac=latent_fac, epsilon_std=1.) vae.fit_generator(generator=training_generator, epochs=epochs, verbose=0) reconstruct_train = [] latent_trajectory = [] for i in range(len(nobin_training_data)): shape1, shape2 = nobin_training_data[i].shape reconstruct_train.append( vae.predict(nobin_training_data[i].reshape(1, shape1, shape2), verbose=0)) latent_trajectory.append( vae_encoder2.predict(nobin_training_data[i].reshape( 1, shape1, shape2), verbose=0)) def list2array(l, reshape=False): a = l[0] if reshape == True: _, shape1, shape2 = l[0].shape a = a.reshape(shape1, shape2) for i in range(1, len(l)): if reshape == True: _, shape1, shape2 = l[i].shape a_i = l[i].reshape(shape1, shape2) else: a_i = l[i] a = np.vstack((a, a_i)) return a nobin_training_data = list2array(nobin_training_data, reshape=False) reconstruct_train = list2array(reconstruct_train, reshape=True) latent_trajectory = list2array(latent_trajectory, reshape=True) pca = PCA(n_components=latent_fac) pca.fit(latent_trajectory) evr = pca.explained_variance_ratio_ dev, _, _ = deviance(reconstruct_train, nobin_training_data, 'poisson') return dev, evr
def main(): tf.set_random_seed(1) np.random.seed(1) random.seed(1) # Test environment # if not FLAGS.train: # if 'reach' in FLAGS.experiment: # env = gym.make('ReacherMILTest-v1') # ob = env.reset() # import pdb; pdb.set_trace() graph = tf.Graph() config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(graph=graph, config=config) # Create object if flag_train: data_generator = DataGenerator(update_batch_size, test_batch_size, meta_batch_size) model = MIL(num_update) # put here for now if flag_train: if use_meta_learning: model.init_network(graph) else: model.init_network(graph, meta_learning='False') else: model.init_network(graph, prefix='Testing', meta_learning='False') with graph.as_default(): # Set up saver. saver = tf.train.Saver(max_to_keep=10) # Initialize variables. init_op = tf.global_variables_initializer() sess.run(init_op, feed_dict=None) # Start queue runners (used for loading videos on the fly) tf.train.start_queue_runners(sess=sess) if flag_train == False: # Restore model from file with graph.as_default(): try: saver.restore(sess, log_dir) print 'load' except: init_op = tf.global_variables_initializer() sess.run(init_op, feed_dict=None) if flag_train: train(graph, model, saver, sess, data_generator, meta=use_meta_learning) else: test_gazebo(graph, model, saver, sess, meta=use_meta_learning)
def train_gen(self, training_infos, validation_infos, save_weights_path, batch_size, nb_epochs, learning_rate): # create data generator params = { 'batch_size': batch_size, 'shuffle': True, 'X_shape': self.input_size, 'y_shape': self.grid_size * self.grid_size * (self.bbox_params + len(self.classes)), 'grid_size': self.grid_size, 'class_count': len(self.classes) } training_generator = DataGenerator(data_list=training_infos, **params) valid_generator = DataGenerator(data_list=validation_infos, **params) checkpoint = ModelCheckpoint(save_weights_path, monitor='val_loss', verbose=1, save_best_only=False, save_weights_only=True, mode='min', period=1) callbacks_list = [checkpoint] # ToDo: A lot of parameters... maybe it is good idea to tune them #optimizer = optimizers.Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) #sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) optimizer = optimizers.SGD(lr=1e-16, decay=1e-6, nesterov=True) self.model.compile(loss=self.custom_loss, optimizer=optimizer) self.model.fit_generator(generator=training_generator, validation_data=valid_generator, epochs=nb_epochs, callbacks=callbacks_list)
def inference_validation(args): # Arugments & parameters workspace = args.workspace holdout_fold = args.holdout_fold iteration = args.iteration filename = args.filename cuda = args.cuda validate = True # Paths hdf5_path = os.path.join(workspace, 'features', 'logmel', 'development.h5') validation_csv = os.path.join(workspace, 'validation.csv') model_path = os.path.join(workspace, 'models', filename, 'holdout_fold{}'.format(holdout_fold), 'md_{}_iters.tar'.format(iteration)) # Load model model = Model() checkpoint = torch.load(model_path) model.load_state_dict(checkpoint['state_dict']) if cuda: model.cuda() # Data generator generator = DataGenerator(hdf5_path=hdf5_path, batch_size=batch_size, validation_csv=validation_csv, holdout_fold=holdout_fold) generate_func = generator.generate_validate(data_type='validate', shuffle=False, max_iteration=None) # Inference dict = forward(model=model, generate_func=generate_func, cuda=cuda, return_target=True, return_bottleneck=False) outputs = dict['output'] targets = dict['target'] itemids = dict['itemid'] # Evaluate va_acc = calculate_accuracy(targets, outputs) va_auc = calculate_auc(targets, outputs) logging.info('va_acc: {:.3f}, va_auc: {:.3f}'.format(va_acc, va_auc))
def run(batch_id, source_file_name, output_file_name): data_gen = DataGenerator() # load source file source_columns = [ 'KnowledgeArticle.External_Id__c', 'User.External_Id__c', 'CreatedDate__c' ] data_gen.load_source_file(source_file_name, source_columns) data_gen.rename_column('KnowledgeArticle.External_Id__c', 'KCSArticle__ka.External_Id__c') data_gen.rename_column('User.External_Id__c', 'Owner.External_Id__c') data_gen.add_formula_column( 'External_Id__c', formula=lambda: 'W_KCSArticleVersion.' + str(data_gen.current_row + 1)) data_gen.add_formula_column('ArticleNumber__c', lambda: data_gen.current_row + 1) data_gen.add_formula_column('PublishStatus__c', ['Archived', 'Online']) data_gen.add_constant_column('IsLatestVersion__c', 'true') data_gen.add_constant_column('IsVisibleInApp__c', 'true') data_gen.add_constant_column('IsVisibleInCsp__c', 'true') data_gen.add_constant_column('IsVisibleInPkb__c', 'true') data_gen.add_constant_column('IsVisibleInPrm__c', 'true') data_gen.add_constant_column('VersionNumber__c', '1') data_gen.add_constant_column('Language__c', 'en_US') titles = [ "Health", "Computers", "Music", "Tools", "Home", "Outdoors", "Jewelery", "Toys", "Grocery", "Clothing", "Games", "Automotive", "Beauty", "Garden", "Books", "Industrial", "Baby", "Kids", "Movies", "Sports", "Shoes", "Electronics" ] data_gen.add_formula_column('Title__c', titles) # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) # apply transformations and write file data_gen.apply_transformations() output_columns = [ 'External_Id__c', 'ArticleNumber__c', 'CreatedDate__c', 'Owner.External_Id__c', 'PublishStatus__c', 'IsLatestVersion__c', 'IsVisibleInApp__c', 'IsVisibleInCsp__c', 'IsVisibleInPkb__c', 'IsVisibleInPrm__c', 'KCSArticle__ka.External_Id__c', 'Title__c', 'VersionNumber__c', 'Language__c', 'analyticsdemo_batch_id__c' ] data_gen.write(output_file_name, output_columns)
def predict(self): """ Run prediction for every sub-folder """ # Initialize model self.get_model_by_name(self.architecture) # Propagate configuration parameters. self.model.set_batch_size(self.batch_size) # Construct and compile the model. self.model.construct(self.tile_size, self.tile_size, len(self.features), len(self.classes)) self.model.compile() # Load model weights. self.model.load_weights(self.weights_path) # Go through all folders date_match = self.product_name.rsplit('_', 1)[-1] index_match = self.product_name.rsplit('_', 1)[0].rsplit('_', 1)[-1] tile_paths = [] # Look for .nc file, as the name is not specified for subfolder in os.listdir(self.product_cvat): subfolder_path = os.path.join(self.product_cvat, subfolder) if os.path.isdir(subfolder_path): for file in os.listdir(subfolder_path): if file.endswith(".nc"): tile_paths.append(os.path.join(subfolder_path, file)) # Initialize data generator self.params = {'path_input': self.product_cvat, 'batch_size': self.batch_size, 'features': self.features, 'tile_size': self.tile_size, 'num_classes': len(self.classes), 'product_level': self.product, 'shuffle': False } predict_generator = DataGenerator(tile_paths, **self.params) # sub_batch size 1 mean that we process data as whole, 2 dividing by half etc. #set_normalization(predict_generator, tile_paths, 1) # Run prediction predictions = self.model.predict(predict_generator) #sen2cor = predict_generator.get_sen2cor() #mask = (sen2cor[:, :, :, 3] == 1) #prediction_union = predictions #prediction_union[mask, 3] = sen2cor[mask, 3] y_pred = np.argmax(predictions, axis=3) for i, prediction in enumerate(predictions): save_masks_contrast(tile_paths[i], prediction, y_pred[i], self.prediction_product_path, self.classes) return
def run(batch_id, source_file_name, output_file_name, reference_datetime=today, id_offset=0): data_gen = DataGenerator() # load source file source_columns = [ 'External_Id__c', 'Owner.External_Id__c', 'CreatedDate__c', 'LastActivityDate__c' ] data_gen.load_source_file(source_file_name, source_columns) data_gen.rename_column('External_Id__c', 'What.External_Id__c') data_gen.rename_column('LastActivityDate__c', 'ActivityDate') # generate a random number of tasks per opportunity data_gen.duplicate_rows(duplication_factor=lambda: randint(1, 3)) data_gen.add_formula_column('External_Id__c', formula=lambda: 'W_Task.' + str(id_offset + data_gen.current_row + 1)) data_gen.add_formula_column('TaskSubtype', formula=task.oppty_task_subtype) data_gen.add_formula_column('CallDurationInSeconds', formula=task.task_call_duration) data_gen.add_formula_column('CallDisposition', formula=task.task_call_disposition) data_gen.add_formula_column('CallType', formula=task.task_call_type) data_gen.add_formula_column('Status', formula=task.task_status) data_gen.add_formula_column('Priority', formula=task.task_priority) def create_date_formula(column_values): oppty_create_date = dateutil.parser.parse(column_values['CreatedDate__c']) oppty_last_activity_date = dateutil.parser.parse(column_values['ActivityDate']) create_date = fake.date_time_between_dates(oppty_create_date, oppty_last_activity_date) if create_date > reference_datetime: create_date = reference_datetime return create_date.isoformat(sep=' ') data_gen.add_formula_column('CreatedDate__c', create_date_formula) def activity_date_formula(column_values): create_date = dateutil.parser.parse(column_values['CreatedDate__c']).date() return (create_date + timedelta(days=randint(0, 14))).isoformat() data_gen.add_formula_column('ActivityDate', activity_date_formula) data_gen.add_formula_column('Subject', formula=task.task_subject) # add a UUID for each row that is created in this batch data_gen.add_constant_column('analyticsdemo_batch_id__c', batch_id) # apply transformations and write data_gen.apply_transformations() data_gen.write(output_file_name)
def main(): read_f = file("./data/train_data", "rb") train_generator = cPickle.load(read_f) read_f.close() read_f = file("./data/emb", "rb") embedding_matrix, _, _ = cPickle.load(read_f) read_f.close() test_generator = DataGenerator("test", args.batch_size) model = Network(args.embedding_size, args.embedding_dimension, embedding_matrix, args.hidden_dimension).cuda() best_model = Network(args.embedding_size, args.embedding_dimension, embedding_matrix, args.hidden_dimension).cuda() optimizer = optim.Adam(model.parameters(), lr=args.learning_rate, weight_decay=args.l2_reg) best_result = 0.0 for echo in range(args.epoch_num): info = "[" + echo * ">" + " " * (args.epoch_num - echo) + "]" sys.stderr.write(info + "\r") cost1, cost2, cost, total_num = 0.0, 0.0, 0.0, 0 for data in train_generator.generate_data(shuffle=True): zp_rep, npc_rep, np_rep, feature = model.forward(data, dropout=args.dropout) output = model.generate_score(zp_rep, npc_rep, np_rep, feature) optimizer.zero_grad() dis1 = output[data['wid']] - output[data['cid']] + args.margin dis2 = output[data['uwid']] - args.wrong_bound dis3 = args.correct_bound - output[data['ucid']] triplet_loss = torch.sum(dis1 * (dis1 > 0).cuda().float()) + torch.sum( dis2 * (dis2 > 0).cuda().float()) + torch.sum(dis3 * (dis3 > 0).cuda().float()) cos_sim_sum = torch.sum(1 - F.cosine_similarity(np_rep[data['cid1']], np_rep[data['cid2']])) sim_w = 0.5 num = data["result"].shape[0] total_loss = triplet_loss + sim_w * cos_sim_sum total_loss.backward() cost += total_loss.item() * num cost1 += triplet_loss.item() * num cost2 += cos_sim_sum.item() * num total_num += num optimizer.step() train_re = evaluate_train(train_generator, model) dev_re, dev_cost = evaluate_dev(train_generator, model, args.margin) if dev_re > best_result: best_result = dev_re net_copy(best_model, model) test_re = evaluate_test(test_generator, model) print 'Epoch %s; Train Cost: %.4f, %.4f, %.4f; Train Result: %.4f; Dev Result: %.4f, %.4f; Test Result: %.4f' % ( echo, cost / total_num, cost1 / total_num, cost2 / total_num, train_re, dev_re, dev_cost, test_re) print >> sys.stderr torch.save(best_model, "./models/model") re = evaluate_test(test_generator, best_model) print "Performance on Test: F", re
def thread_job(functionid, iterations_per_thread, events_per_iteration, use_lambda, context, sleep_duration, event_type, sensitivity_type): #Module is not available in a lambda environment from data_generator import DataGenerator import payload import lambda_fifo_message_producer p_lambda = Lambda({}) data_generator = DataGenerator(context) #data = data_generator.csv(events_per_iteration, event_type) for t in range(0, iterations_per_thread): if sensitivity_type == None: sensitivity_type = random.choice([ sensitivity.SENSITIVITY_TYPE.NONE, sensitivity.SENSITIVITY_TYPE.ENCRYPT ]) compression_mode = random.choice([ compression.COMPRESSION_MODE.NONE, compression.COMPRESSION_MODE.COMPRESS ]) payload_type = random.choice( [payload.PAYLOAD_TYPE.CSV, payload.PAYLOAD_TYPE.JSON]) data = None #TODO: test data generation should be moved to the payload sub class if payload_type == payload.PAYLOAD_TYPE.JSON: data = data_generator.json(events_per_iteration, event_type) else: data = data_generator.csv(events_per_iteration, event_type) if os.environ[c.ENV_VERBOSE]: print "Data: \t{}".format(os.environ[c.ENV_VERBOSE]) if use_lambda: response = message( type('obj', (object, ), {'event': { c.API_PARAM_SOURCE_IP: '127.0.0.1' }}), compression_mode, sensitivity_type, payload_type, {c.API_PARAM_DATA: data}) else: payload_data = { c.API_PARAM_SOURCE_IP: '127.0.0.1', c.SQS_PARAM_SENSITIVITY_TYPE: sensitivity_type, c.SQS_PARAM_PAYLOAD_TYPE: payload_type, c.SQS_PARAM_COMPRESSION_TYPE: compression_mode, c.API_PARAM_PAYLOAD: { c.API_PARAM_DATA: data } } response = lambda_fifo_message_producer.main( payload_data, type('obj', (object, ), {})) print "StatusCode: {}".format(response['StatusCode']) time.sleep(sleep_duration)
def initialize(self, height, width, n_tiles): Game.__init__(self) self.height = height self.width = width self.n_tiles = n_tiles dg = DataGenerator() self.tiles = dg.gen_matrix_instance(n_tiles, width, height) print(self.tiles) self._base_board = Board(height, width, self.tiles)
def test_initialize_state_3(self): w = 3 h = 3 dg = DataGenerator(w, h) prediction = np.array([[10, 10, 0.6], [10, 10, 0.8], [1.8, 1.9, 0.71]]) grid = np.array([[1, 1, 0], [1, 1, 0], [0, 0, 0]]) tiles = [(1, 1), (1, 1), (2, 1), (1, 2), (3, 1), (1, 3)] ret = get_best_tile_by_prediction(grid, tiles, prediction, dg) self.assertEqual(ret, [(3, 1), (0, 2)])
def main(): options = Options() options.make() dsn = build_dsn(options) connection = psycopg2.connect(dsn) connection.autocommit = True cursor = connection.cursor() schema_generator = SchemaGenerator(cursor) schema = schema_generator.generate_schema() print(schema) data_generator = DataGenerator(cursor, schema_generator, options) data_generator.generate_data()