def load_ztf_data(head_path=None, phot_path=None, defult_train_set=False): """ load ztf test data. The way this file is created, separators (-777) are already dropped. :param defult_train_set: used only if head file path or phot file path is left as none. If True, it loads the default train set else it will load the default testset :param head_path: path to head file :param phot_path: path to phot file :return: object of the data class """ if (head_path is None) | (phot_path is None): if defult_train_set: head_path = '/media/biswajit/drive/Kilonova_datasets/ZTF_20190512/train_HEAD.FITS' phot_path = '/media/biswajit/drive/Kilonova_datasets/ZTF_20190512/train_PHOT.FITS' else: head_path = '/media/biswajit/drive/Kilonova_datasets/ZTF_20190512/test_HEAD.FITS' phot_path = '/media/biswajit/drive/Kilonova_datasets/ZTF_20190512/test_PHOT.FITS' df_header = Table.read(head_path, format='fits') df_phot = Table.read(phot_path, format='fits') data_ob = Data(df_metadata=df_header, df_data=df_phot, object_id_col_name='SNID', time_col_name='MJD', target_col_name='SNTYPE', band_col_name='FLT', brightness_col_name='FLUXCAL', brightness_err_col_name='FLUXCALERR', bands=['g', 'r']) return data_ob
def __init__(self, config, logger, metadata): super(Task, self).__init__(config, logger, metadata) self.sum = 0 self.model = Model(config.get_with_prefix("model")) self.data = Data(config.get_with_prefix("data")) self.trainer = Trainer(config.get_with_prefix("trainer"), self.model, self.data) self.best_val_acc = 0 self.number_worse_iterations = 0
def main(args): file = 'examples/vectors.csv' if args[1] == 'g': if len(args) == 4: nrows = int(args[2]) ncols = int(args[3]) else: nrows = 501 ncols = 11 config = Config( file=file, nrows=nrows, ncols=ncols, ) Data(config) elif args[1] == 'a': data = Data(file=file) ed = EuclideanDistance(data) with open('examples/summary.txt', 'w+') as handle: handle.write('min:' + str(ed.min) + '\n') handle.write('max:' + str(ed.max) + '\n') handle.write('xlabels: ' + str(ed._get_xlabels(Decimal('0.1'))) + '\n') ed.histogram('examples/hist.png')
def get_data_title(): path = Get_path.real_title() data_r = Filehandler.get_text(path) path = Get_path.real_title_2() data_r += Filehandler.get_text(path) path = Get_path.fake_title() data_f = Filehandler.get_text(path) path = Get_path.fake_title_2() data_f += Filehandler.get_text(path) data = Data(data_r, data_f) return data
def load_RESSPECT_data( phot_df_file_path="/media/biswajit/drive/Kilonova_datasets/RESSPECT" "/RESSPECT_PERFECT_LIGHTCURVE.csv", meta_df_file_path="/media/biswajit/drive/Kilonova_datasets/RESSPECT/RESSPECT_PERFECT_HEAD.csv" ): """ load RESSPECT simulations for generating PCs :param phot_df_file_path: path to data file :param meta_df_file_path: path to header file :return: """ df_meta_data = Table.read(meta_df_file_path, delimiter=",") df_data = Table.read(phot_df_file_path) data_ob = Data(df_metadata=df_meta_data, df_data=df_data, object_id_col_name='SNID', time_col_name='MJD', band_col_name='FLT', brightness_col_name='FLUXCAL', brightness_err_col_name='FLUXCALERR', bands=['u', 'g', 'r', 'i', 'z', 'Y'], target_col_name='type') return data_ob
parser = argparse.ArgumentParser(description='arguments input') parser.add_argument( '-d', '--data', type=str, help='Training data folder with subfolders: DSM, PAN, LABEL', required=True) args = parser.parse_args() img_height = 256 img_width = 256 data_folder = args.data data = Data(data_folder) # split to train, validation, test dsm_train, pan_train, label_train, dsm_vld, pan_vld, label_vld, dsm_tst, pan_tst, label_tst = data.split_trn_vld_tst( ) # create two generators: for training and for validation train_gen = DataGenerator(dsm_train, pan_train, label_train, pred_fn=None, batch_size=8, shuffle=True) valid_gen = DataGenerator(dsm_vld, pan_vld, label_vld,
def backward(): yolo = YOLO(config.class_num, config.anchors, width=config.width, height=config.height) data = Data(config.train_file, config.class_num, config.batch_size, config.anchors, config.multi_scale_img, width=config.width, height=config.height) inputs = tf.compat.v1.placeholder(dtype=tf.float32, shape=[config.batch_size, None, None, 3]) y1_true = tf.compat.v1.placeholder( dtype=tf.float32, shape=[config.batch_size, None, None, 3, 4 + 1 + config.class_num]) y2_true = tf.compat.v1.placeholder( dtype=tf.float32, shape=[config.batch_size, None, None, 3, 4 + 1 + config.class_num]) y3_true = tf.compat.v1.placeholder( dtype=tf.float32, shape=[config.batch_size, None, None, 3, 4 + 1 + config.class_num]) feature_y1, feature_y2, feature_y3 = yolo.forward( inputs, weight_decay=config.weight_decay, isTrain=True) global_step = tf.Variable(0, trainable=False) # 损失 yolov4 loss = yolo.get_loss_v4(feature_y1, feature_y2, feature_y3, y1_true, y2_true, y3_true, config.cls_normalizer, config.ignore_thresh, config.prob_thresh, config.score_thresh) l2_loss = tf.compat.v1.losses.get_regularization_loss() epoch = compute_curr_epoch(global_step, config.batch_size, len(data.imgs_path)) lr = config_lr(config.lr_type, config.lr_init, epoch) optimizer = config_optimizer(config.optimizer_type, lr, config.momentum) update_ops = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): gvs = optimizer.compute_gradients(loss + l2_loss) clip_grad_var = [ gv if gv[0] is None else [tf.clip_by_norm(gv[0], 100.), gv[1]] for gv in gvs ] train_step = optimizer.apply_gradients(clip_grad_var, global_step=global_step) # 初始化 init = tf.compat.v1.global_variables_initializer() saver = tf.compat.v1.train.Saver() with tf.compat.v1.Session() as sess: sess.run(init) step = 0 ckpt = tf.compat.v1.train.get_checkpoint_state(config.model_path) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] step = eval(step) Log.add_log("message:存在 ckpt 模型, global_step=" + str(step)) else: Log.add_log("message:不存在 ckpt 模型") # 一共迭代这么多次 total_steps = np.ceil(config.total_epoch * len(data.imgs_path) / config.batch_size) while step < total_steps: start = time.perf_counter() batch_img, y1, y2, y3 = next(data) _, loss_, step, lr_ = sess.run([train_step, loss, global_step, lr], feed_dict={ inputs: batch_img, y1_true: y1, y2_true: y2, y3_true: y3 }) end = time.perf_counter() print( "step: %6d, loss: %.5g\t, w: %3d, h: %3d, lr:%.5g\t, time: %5f s" % (step, loss_, data.width, data.height, lr_, end - start)) if step % 5 == 2: Log.add_loss(str(step) + "\t" + str(loss_)) if (step + 1) % config.save_step == 0: Log.add_log("message:当前运行模型保存, step=" + str(step) + ", lr=" + str(lr_)) saver.save(sess, path.join(config.model_path, config.model_name), global_step=0) Log.add_log("message:训练完成保存模型, step=" + str(step)) saver.save(sess, path.join(config.model_path, config.model_name), global_step=step) return 0
def backward(): yolo = YOLO() data = Data(train_file, class_num, batch_size, anchors, width=width, height=height, data_debug=data_debug) inputs = tf.compat.v1.placeholder(dtype=tf.float32, shape=[batch_size, None, None, 3]) y1_true = tf.compat.v1.placeholder( dtype=tf.float32, shape=[batch_size, None, None, 3, 4 + 1 + class_num]) y2_true = tf.compat.v1.placeholder( dtype=tf.float32, shape=[batch_size, None, None, 3, 4 + 1 + class_num]) y3_true = tf.compat.v1.placeholder( dtype=tf.float32, shape=[batch_size, None, None, 3, 4 + 1 + class_num]) feature_y1, feature_y2, feature_y3 = yolo.forward( inputs, class_num=class_num, weight_decay=weight_decay, isTrain=True) global_step = tf.Variable(0, trainable=False) # loss value of yolov4 loss = Loss().yolo_loss([feature_y1, feature_y2, feature_y3], [y1_true, y2_true, y3_true], [anchors[2], anchors[1], anchors[0]], width, height, class_num, cls_normalizer=cls_normalizer, iou_normalizer=iou_normalizer, iou_thresh=iou_thresh, prob_thresh=prob_thresh, score_thresh=score_thresh) l2_loss = tf.compat.v1.losses.get_regularization_loss() epoch = compute_curr_epoch(global_step, batch_size, len(data.imgs_path)) lr = Lr.config_lr(lr_type, lr_init, lr_lower=lr_lower, \ piecewise_boundaries=piecewise_boundaries, \ piecewise_values=piecewise_values, epoch=epoch) optimizer = Optimizer.config_optimizer(optimizer_type, lr, momentum) update_ops = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): gvs = optimizer.compute_gradients(loss + l2_loss) clip_grad_var = [ gv if gv[0] is None else [tf.clip_by_norm(gv[0], 100.), gv[1]] for gv in gvs ] train_step = optimizer.apply_gradients(clip_grad_var, global_step=global_step) # initialize init = tf.compat.v1.global_variables_initializer() saver = tf.compat.v1.train.Saver() with tf.compat.v1.Session() as sess: sess.run(init) step = 0 ckpt = tf.compat.v1.train.get_checkpoint_state(model_path) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] step = eval(step) Log.add_log("message: load ckpt model, global_step=" + str(step)) else: Log.add_log("message: can not find ckpt model") curr_epoch = step // data.steps_per_epoch while curr_epoch < total_epoch: for _ in range(data.steps_per_epoch): start = time.perf_counter() batch_img, y1, y2, y3 = next(data) _, loss_, step, lr_ = sess.run( [train_step, loss, global_step, lr], feed_dict={ inputs: batch_img, y1_true: y1, y2_true: y2, y3_true: y3 }) end = time.perf_counter() if step % 5 == 2: print( "step: %6d, epoch:%3d, loss: %.5g\t, wh: %3d, lr:%.5g\t, time: %5f s" % (step, curr_epoch, loss_, width, lr_, end - start)) Log.add_loss(str(step) + "\t" + str(loss_)) if (loss_ > 1e3) and (step > 1e3): Log.add_log("error:loss exception, loss_value = " + str(loss_)) ''' break the process or lower learning rate ''' raise ValueError("error:loss exception, loss_value = " + str(loss_) + ", please lower your learning rate") # lr = tf.math.maximum(tf.math.divide(lr, 10), config.lr_lower) curr_epoch += 1 if curr_epoch % save_per_epoch == 0: # save ckpt model Log.add_log("message: save ckpt model, step=" + str(step) + ", lr=" + str(lr_)) saver.save(sess, path.join(model_path, model_name), global_step=step) # save ckpt model Log.add_log("message:save final ckpt model, step=" + str(step)) saver.save(sess, path.join(model_path, model_name), global_step=step) return 0
import networkx as nx import matplotlib.pyplot as plt import matplotlib.animation as animation from pylab import * from random import randint, randrange from src.Data import Data import numpy as np data_size = int(input("Entrez la taille de votre population: ")) duration = int(input("Donner la duree pour votre etude : ")) # get data generated randomly (1% of people are infected as an initial cases) dataObject = Data(data_size) # get people list data = dataObject.data # table des statistiques chaque jour { 0: 'S' , 1: 'I' , 2: 'D', 3: 'G'} statistiques = {(i, j): 0 for i in range(duration) for j in range(4)} # tableau de la duree d'infection pour chaque personne infectee jourInfectee = [0 for _ in range(data_size)] degrees = [] graph_transitivity = [] fig1 = plt.figure() G = nx.Graph() G.add_nodes_from(range(data_size)) def setJourStatistics(i): if i == 0: statistiques[0, 0] = 0
def setUp(self) -> None: self._data = Data(file='tests/test_vectors.csv') self._euclidean_distance = EuclideanDistance(self._data)
def test_if_numbers_are_valid(self): data = Data(TestData.config) mn = data.data_frame[data.data_frame.columns].min().min() mx = data.data_frame[data.data_frame.columns].max().min() self.assertTrue(mn > TestData.min_border and mx < TestData.max_border)
def setUpClass(cls) -> None: Data(TestData.config)
import tensorflow as tf import argparse parser = argparse.ArgumentParser(description='Process some integers.') parser.add_argument('task_path') args = parser.parse_args() api = taskplan.Api() task = api.load_task(args.task_path) config = task.config path = task.build_save_dir() model = Model(config.get_with_prefix("model")) model.load_weights(str(path / Path("model.h5py"))) data = Data(config.get_with_prefix("data")) acc = tf.keras.metrics.SparseCategoricalAccuracy() for data in data.build_test_dataset(): images, labels = data pred = model(images) acc(labels, pred) print("Acc: " + str(acc.result())) tensorboard_writer = tf.summary.create_file_writer(str(path)) with tensorboard_writer.as_default(): tf.summary.scalar('test/acc', acc.result(), step=task.finished_iterations)
# path_test_data = '../../sighan2005/%s_test_processed.utf8' % dataSet # path_dev_data = None # if(dataSet == 'ctb6'):path_dev_data = '../../sighan2005/%s_dev_processed.utf8' % dataSet # except: path_lookup_table = '../PreTrainedWordEmbedding/charactor_OOVthr_50_10v.txt' path_train_data = '../sighan2005/origin/%s_train_column.tsv' % dataSet path_test_data = '../sighan2005/origin/%s_test_column.tsv' % dataSet path_dev_data = None if (dataSet == 'ctb6'): path_dev_data = '../../sighan2005/%s_dev_processed.utf8' % dataSet flag_random_lookup_table = False dic_label = {'B':1,'E':2,'I':3,'S':0} # pdb.set_trace() data = Data(path_lookup_table=path_lookup_table, wordVecLen=wordVecLen,path_train_data=path_train_data, path_test_data=path_test_data,path_dev_data = path_dev_data, flag_random_lookup_table=flag_random_lookup_table, dic_label=dic_label, use_bigram_feature=use_bigram_feature, random_seed=random_seed, flag_toy_data = flag_toy_data) seg_result_file = 'seg_result/seg_result_%s' % dataSet cws = CWS(alpha = alpha, squared_filter_length_limit=squared_filter_length_limit, batch_size=batch_size, n_epochs=n_epochs, seg_result_file=seg_result_file, L2_reg = L2_reg, HINGE_reg = HINGE_reg, wordVecLen = wordVecLen, preWindowSize = preWindowSize, surWindowSize = surWindowSize, flag_dropout = flag_dropout,
def main(): embedding_files = config.get('embedding_files') dev_size = config.get('dev_size') # dev_size = 50000 data = Data() data.load(dev_size) data.split() nlp = spacy.load('en', disable=['parser', 'tagger', 'ner']) ci_path = '/home/matt/ci.p' if os.path.isfile(ci_path): corpus_info = pickle.load(open(ci_path, 'rb')) else: corpus_info = CorpusInfo(data.get_questions(subset='train'), nlp) pickle.dump(corpus_info, open(ci_path, 'wb')) word_counts = corpus_info.word_counts char_counts = corpus_info.char_counts text_mapper = TextMapper(word_counts=word_counts, char_counts=char_counts, word_threshold=5, max_word_len=12, char_threshold=350, max_sent_len=70, nlp=nlp, word_lowercase=True, char_lowercase=True) word_vocab = text_mapper.get_words_vocab() emb_path = '/home/matt/emb.p' if True: embeddings = pickle.load(open(emb_path, 'rb')) else: embeddings = load_embeddings(word_vocab, embedding_files) pickle.dump(embeddings, open(emb_path, 'wb')) unknown_word_models = [ UnknownWords(text_mapper, embedding) for embedding in embeddings ] for model in unknown_word_models: model.define_model() model.fit(data.train_qs) model.improve_embedding() # save_unknown_words(data, embeddings, max_words=200) # models_all = list() # for model in config.get('models'): # model_class = globals()[model.get('class')] # models_all.extend(cross_validate(model_class, # data, # embeddings, # model_config=model.get('args'))) model = BiLSTMCharCNNModel(data=data, corpus_info=corpus_info, text_mapper=text_mapper, batch_size=128) model.blend_embeddings(embeddings) model.define_model() model.fit() # cleanup_models([model]) # embedding/memory cleanup val_preds = model.predict_subset(subset='val') # ensemble_cv = Ensemble(m odels_all) # train_X = [data.train_X] # val_X = [data.val_X] # test_X = [data.test_X] # if data.custom_features: # train_X += [data.train_features] # val_X += [data.val_features] # test_X += [data.test_features] # find the best threshold # pred_train_y = ensemble_cv.predict_linear_regression(train_X, data.train_y, train_X) val_y = np.array(data.val_labels) thresh = find_best_threshold(val_preds, val_y) # pred_val_y = ensemble_cv.predict_linear_regression(train_X, data.train_y, val_X) print_diagnostics(val_y, (val_preds > thresh).astype(int)) # pred_y_test = ensemble_cv.predict_linear_regression(train_X, data.train_y, test_X) pred_y_test = model.predict_subset('test') write_predictions(data, pred_y_test, thresh)
def backward(): yolo = YOLO(config.class_num, config.anchors, width=config.width, height=config.height) data = Data(config.train_file, config.class_num, config.batch_size, config.anchors, config.data_augment, width=config.width, height=config.height, data_debug=config.data_debug) inputs = tf.compat.v1.placeholder(dtype=tf.float32, shape=[config.batch_size, None, None, 3]) y1_true = tf.compat.v1.placeholder( dtype=tf.float32, shape=[config.batch_size, None, None, 3, 4 + 1 + config.class_num]) y2_true = tf.compat.v1.placeholder( dtype=tf.float32, shape=[config.batch_size, None, None, 3, 4 + 1 + config.class_num]) y3_true = tf.compat.v1.placeholder( dtype=tf.float32, shape=[config.batch_size, None, None, 3, 4 + 1 + config.class_num]) feature_y1, feature_y2, feature_y3 = yolo.forward( inputs, weight_decay=config.weight_decay, isTrain=True) global_step = tf.Variable(0, trainable=False) # 损失 yolov4 loss = yolo.get_loss_v4(feature_y1, feature_y2, feature_y3, y1_true, y2_true, y3_true, config.cls_normalizer, config.ignore_thresh, config.prob_thresh, config.score_thresh) l2_loss = tf.compat.v1.losses.get_regularization_loss() epoch = compute_curr_epoch(global_step, config.batch_size, len(data.imgs_path)) lr = config_lr(config.lr_type, config.lr_init, epoch) optimizer = config_optimizer(config.optimizer_type, lr, config.momentum) update_ops = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): gvs = optimizer.compute_gradients(loss + l2_loss) clip_grad_var = [ gv if gv[0] is None else [tf.clip_by_norm(gv[0], 100.), gv[1]] for gv in gvs ] train_step = optimizer.apply_gradients(clip_grad_var, global_step=global_step) # 初始化 init = tf.compat.v1.global_variables_initializer() saver = tf.compat.v1.train.Saver() with tf.compat.v1.Session() as sess: sess.run(init) step = 0 ckpt = tf.compat.v1.train.get_checkpoint_state(config.model_path) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] step = eval(step) Log.add_log("message:存在 ckpt 模型, global_step=" + str(step)) else: Log.add_log("message:不存在 ckpt 模型") # 一共迭代这么多次 total_steps = np.ceil(config.total_epoch * len(data.imgs_path) / config.batch_size) while step < total_steps: start = time.perf_counter() batch_img, y1, y2, y3 = next(data) _, loss_, step, lr_ = sess.run([train_step, loss, global_step, lr], feed_dict={ inputs: batch_img, y1_true: y1, y2_true: y2, y3_true: y3 }) end = time.perf_counter() print( "step: %6d, loss: %.5g\t, w: %3d, h: %3d, lr:%.5g\t, time: %5f s" % (step, loss_, data.width, data.height, lr_, end - start)) if (loss_ > 1e3) and (step > 1e3): Log.add_log("error:loss exception, loss_value = " + str(loss_)) ''' break the process or lower learning rate ''' raise ValueError("error:loss exception, loss_value = " + str(loss_) + ", please lower your learning rate") # lr = tf.math.maximum(tf.math.divide(lr, 10), config.lr_lower) if step % 5 == 2: Log.add_loss(str(step) + "\t" + str(loss_)) if (step + 1) % config.save_step == 0: # save ckpt model if config.save_ckpt_model: Log.add_log("message: save ckpt model, step=" + str(step) + ", lr=" + str(lr_)) saver.save(sess, path.join(config.model_path, config.model_name), global_step=step) if config.save_pb_model: Log.add_log("message: save pb model, step=" + str(step)) pb_model_name = path.join( config.model_path, config.model_name) + '-' + str(step) + ".pb" constant_graph = graph_util.convert_variables_to_constants( sess, sess.graph_def, [ 'yolo/Conv_1/BiasAdd', 'yolo/Conv_9/BiasAdd', 'yolo/Conv_17/BiasAdd' ]) # save PB model with tf.gfile.FastGFile(pb_model_name, mode='wb') as f: f.write(constant_graph.SerializeToString()) # save ckpt model if config.save_ckpt_model: Log.add_log("message:save final ckpt model, step=" + str(step)) saver.save(sess, path.join(config.model_path, config.model_name), global_step=step) # save pb model if config.save_pb_model: Log.add_log("message: save final pb model, step=" + str(step)) pb_model_name = path.join( config.model_path, config.model_name) + '-' + str(step) + ".pb" constant_graph = graph_util.convert_variables_to_constants( sess, sess.graph_def, [ 'yolo/Conv_1/BiasAdd', 'yolo/Conv_9/BiasAdd', 'yolo/Conv_17/BiasAdd' ]) # save PB model with tf.gfile.FastGFile(pb_model_name, mode='wb') as f: f.write(constant_graph.SerializeToString()) return 0