예제 #1
0
    def load_datasets(self,
                      target,
                      data_dir,
                      batch_dir,
                      batch_image_size,
                      stride_size=0):

        print("Loading datasets for [%s]..." % target)
        util.make_dir(batch_dir)

        if stride_size == 0:
            stride_size = batch_image_size // 2

        if self.bicubic_init:
            resampling_method = "bicubic"
        else:
            resampling_method = "nearest"

        datasets = util.DataSets(self.scale,
                                 batch_image_size,
                                 stride_size,
                                 channels=self.channels,
                                 jpeg_mode=self.jpeg_mode,
                                 max_value=self.max_value,
                                 resampling_method=resampling_method)

        if not datasets.is_batch_exist(batch_dir):
            datasets.build_batch(data_dir, batch_dir)

        if target == "training":
            datasets.load_batch_train(batch_dir)
            self.train = datasets
        else:
            datasets.load_batch_test(batch_dir)
            self.test = datasets
예제 #2
0
def main():
    if len(sys.argv) != 7:
        usage()
    pred_dir = sys.argv[1]
    helper.check_dir_exist(pred_dir)
    true_segment_dir = sys.argv[2]
    helper.check_dir_exist(true_segment_dir)
    ct = sys.argv[3]
    outDir = sys.argv[4]
    helper.make_dir(outDir)
    num_chromHMM_state = helper.get_command_line_integer(sys.argv[5])
    num_score_bins = helper.get_command_line_integer(sys.argv[6])
    print "Done getting command line arguments"
    # first get the upper bounds for the score bins
    (reverse_lower_bound_list,
     upper_bound_score_list) = get_score_bins(num_score_bins)
    print "Get the bounds of posterior probabilities that we will set for each of the bin"
    # get the count of true positives and false positives, etc. across all regions in the genome
    total_tp_fp_df = get_tp_fp_data_all_regions(true_segment_dir, pred_dir,
                                                reverse_lower_bound_list, ct,
                                                num_chromHMM_state)
    print "Done processing all the files corresponding to all the regions in the genome"
    # calculate tpr and fpr values for each of the state
    save_fn = os.path.join(outDir, 'tpr_fpr_all_states.txt.gz')
    calculate_tpr_fpr(total_tp_fp_df, num_chromHMM_state, save_fn)
    print "Done calculating true positive rates and false positive rates in all bins"
def call_cross_validation_functions(validate_ct, ct_list, outDir,
                                    train_sampled_data_fn,
                                    all_ct_posterior_folder,
                                    num_chromHMM_state):
    code_file_fn = '/u/home/h/havu73/project-ernst/source_pete/train_and_evaluate/posterior_based/train_predict_chromHMM_posterior.py'
    val_outDir = os.path.join(outDir, 'val_' + validate_ct)
    print "Running validation of ct: " + validate_ct
    for train_ct_i, train_ct in enumerate(
            ct_list
    ):  # this ct will be used be the response variable for training the data.
        predictor_ct_list = ct_list[:train_ct_i] + ct_list[(
            train_ct_i + 1
        ):]  # leave out the response ct. All the remaining ones will be used as predictor and will be passed into the program
        num_predictor_ct = len(predictor_ct_list)
        this_predict_outDir = os.path.join(val_outDir, 'pred_' + train_ct)
        helper.make_dir(this_predict_outDir)
        command = [
            'python', code_file_fn, train_sampled_data_fn,
            all_ct_posterior_folder, this_predict_outDir, train_ct,
            str(num_chromHMM_state),
            str(num_predictor_ct)
        ] + predictor_ct_list
        print "Within, running predicting cell type: " + train_ct
        # call(command)
    print "Averaging results from different predictions for this validation"
    averaging_predictions_to_validate_one_ct(validate_ct_dir=val_outDir,
                                             validate_ct=validate_ct,
                                             num_pred_ct=len(ct_list))
    print ""
    print ""
def main():
    if len(sys.argv) != 3:
        usage()
    chrom_dir = sys.argv[1]
    helper.check_dir_exist(chrom_dir)
    out_dir = sys.argv[2]
    helper.make_dir(out_dir)
    print "Done getting command line arguments"
예제 #5
0
def main():
    if len(sys.argv) != 4:
        usage()
    avg_state_dir = sys.argv[1]
    helper.check_dir_exist(avg_state_dir)
    out_dir = sys.argv[2]
    helper.make_dir(out_dir)
    num_chromHMM_state = helper.get_command_line_integer(sys.argv[3])
    print "Done getting command line arguments"
    calculate_hist_parallel(avg_state_dir, out_dir, num_chromHMM_state)
예제 #6
0
def main():
    if len(sys.argv) != 4:
        usage()
    all_ct_hist_dir = sys.argv[1]
    helper.check_dir_exist(all_ct_hist_dir)
    out_dir = sys.argv[2]
    helper.make_dir(out_dir)
    num_chromHMM_state = helper.get_command_line_integer(sys.argv[3])
    print "Done getting command line arguments"
    average_histogram_across_all_ct(all_ct_hist_dir, out_dir,
                                    num_chromHMM_state)
    print "Done!"
예제 #7
0
def main():
	if len(sys.argv) != 7:
		usage()
	ct_pos_dir = sys.argv[1]
	helper.check_dir_exist(ct_pos_dir)
	out_dir = sys.argv[2]
	helper.make_dir(out_dir)
	num_chromHMM_state = helper.get_command_line_integer(sys.argv[3])
	ct_name = sys.argv[4]
	prefix_pos_fn = sys.argv[5]
	suffix_pos_fn = sys.argv[6]
	print "Done getting command line arguments"
	calculate_hist_parallel(ct_pos_dir, out_dir, num_chromHMM_state, ct_name, prefix_pos_fn, suffix_pos_fn)
def main():
	if len(sys.argv) != 6:
		usage()
	cg_dir = sys.argv[1]
	helper.check_dir_exist(cg_dir)
	out_dir = sys.argv[2]
	helper.make_dir(out_dir)
	num_chromHMM_model = helper.get_command_line_integer(sys.argv[3])
	num_score_bins = helper.get_command_line_integer(sys.argv[4])
	cell_type_list_fn = sys.argv[5]
	ct_list = helper.get_list_from_line_seperated_file(cell_type_list_fn)
	helper.check_file_exist(cell_type_list_fn)
	print "Done getting command line arguments"
	calculate_summary_staistics_across_ct(cg_dir, out_dir, num_chromHMM_model, num_score_bins, ct_list)
	print "Done!"
def main():
    num_mandatory_args = 8
    if len(sys.argv) < num_mandatory_args:
        usage()
    train_segment_fn = sys.argv[1]
    helper.check_file_exist(train_segment_fn)
    all_ct_segment_folder = sys.argv[
        2]  # where the segmentation data of all cell types are combined, and stored in files corresponding to different regions in the genome.
    if not os.path.isdir(all_ct_segment_folder):
        print "all_ct_segment_folder IS NOT VALID: " + all_ct_segment_folder
        usage()
    predict_outDir = sys.argv[3]
    helper.make_dir(predict_outDir)
    response_ct = sys.argv[4]
    try:
        num_chromHMM_state = int(sys.argv[5])
        assert num_chromHMM_state > 0, "num_chromHMM_state needs to be positive"
        num_train_ct = int(sys.argv[6])
        assert num_train_ct > 0, "num_train_ct needs to be positive"
    except:
        print "num_chromHMM_state or num_train_ct is not valid"
        usage()
    train_mode = sys.argv[7]
    if len(sys.argv) != (num_train_ct + num_mandatory_args):
        print "num_train_ct is different from the number of arguments passed into the program"
        usage()
    print "Done getting command line arguments"
    train_cell_types = sys.argv[
        num_mandatory_args:]  # the rest of the arguments are the cell types that we use to train the model
    # 1. Get the data of predictors and response for training
    Xtrain_segment_df, Y_df = get_XY_segmentation_data(train_cell_types,
                                                       response_ct,
                                                       num_chromHMM_state,
                                                       train_segment_fn,
                                                       train_mode)
    print "Done getting one hot data"
    print Xtrain_segment_df.head()
    print
    print Y_df.head()
    # 2. Get the regression machine
    regression_machine = train_model(Xtrain_segment_df, Y_df,
                                     num_chromHMM_state, train_mode)
    print "Done training"
    # 3. Based on the machine just created, process training data and then predict the segmentation at each position for the response_ct
    predict_segmentation(all_ct_segment_folder, regression_machine,
                         predict_outDir, train_cell_types, response_ct,
                         num_chromHMM_state, train_mode)
    print "Done predicting whole genome"
def main():
	if len(sys.argv) != 3:
		usage()
	org_ct_segment_folder = sys.argv[1]
	if not os.path.isdir(org_ct_segment_folder): 
		print "org_ct_segment_folder IS NOT VALID: " + org_ct_segment_folder
		usage()
	output_folder = sys.argv[2]
	helper.make_dir(output_folder)
	print "Done getting command line arguments"
	ct_list, ct_df_list = get_ct_segment_df (org_ct_segment_folder)
	print "Done getting segment_df for all cell types"
	chrom_len_dict = get_chromosome_length(org_ct_segment_folder, ct_list)
	print "Done getting chromosome length"
	combine_segment_in_parallel(ct_list, ct_df_list, chrom_len_dict, output_folder)
	print ""
	print ""
	print "Done!"
def main():
    num_mandatory_args = 7
    if len(sys.argv) < num_mandatory_args:
        usage()
    train_segment_fn = sys.argv[1]
    helper.check_file_exist(train_segment_fn)
    all_ct_posterior_folder = sys.argv[
        2]  # where the segmentation data of all cell types are combined, and stored in files corresponding to different regions in the genome.
    helper.check_dir_exist(all_ct_posterior_folder)
    predict_outDir = sys.argv[3]
    helper.make_dir(predict_outDir)
    response_ct = sys.argv[4]
    try:
        num_chromHMM_state = int(sys.argv[5])
        assert num_chromHMM_state > 0, "num_chromHMM_state needs to be positive"
        num_train_ct = int(sys.argv[6])
        assert num_train_ct > 0, "num_train_ct needs to be positive"
    except:
        print "num_chromHMM_state or num_train_ct is not valid"
        usage()
    if len(sys.argv) != (num_train_ct + num_mandatory_args):
        print "num_train_ct is different from the number of arguments passed into the program"
        usage()
    print "Done getting command line arguments"
    train_cell_types = sys.argv[
        num_mandatory_args:]  # the rest of the arguments are the cell types that we use to train the model
    # 1. Get the data of predictors and response for training
    Xtrain_segment_df, Y_df = get_XY_segmentation_data(
        train_cell_types, response_ct, num_chromHMM_state, train_segment_fn
    )  # Xtrain_segment_df: example colnames: 'E047_S16', 'E047_S17' --> posterior probabilities of each of the state in each cell type that are used to train
    # Y_df --> example colnames 'E047' --> state numbers 1 --> 18 of each position used to train data for the response cell type
    print "Done getting one hot data"
    print Xtrain_segment_df.head()
    print
    print Y_df.head()
    # 2. Get the regression machine
    regression_machine = train_multinomial_logistic_regression(
        Xtrain_segment_df, Y_df, num_chromHMM_state)
    print "Done training"
    # 3. Based on the machine just created, process training data and then predict the segmentation at each position for the response_ct
    predict_segmentation(all_ct_posterior_folder, regression_machine,
                         predict_outDir, train_cell_types, response_ct,
                         num_chromHMM_state)
    print "Done predicting whole genome"
def main():
    if len(sys.argv) != 7:
        usage()
    train_sampled_data_fn = sys.argv[1]
    helper.check_file_exist(train_sampled_data_fn)
    outDir = sys.argv[2]
    helper.make_dir(outDir)
    all_ct_posterior_folder = sys.argv[3]
    helper.check_dir_exist(all_ct_posterior_folder)
    num_chromHMM_state = helper.get_command_line_integer(sys.argv[4])
    validate_ct = sys.argv[5]
    all_ct_list_fn = sys.argv[6]
    print "Done getting command line arguments"
    # get all cell types
    ct_list = get_all_train_ct_list(all_ct_list_fn, validate_ct)
    print ct_list
    # call all cell types
    call_cross_validation_functions(validate_ct, ct_list, outDir,
                                    train_sampled_data_fn,
                                    all_ct_posterior_folder,
                                    num_chromHMM_state)
예제 #13
0
def histogram_movie(data_loc, resolution, plot_loc):
    """
	A function that returns a directory of images
	depicting the probability density (histogram)
	of the positions for each time step that can be
	made into a movie.
		data_loc : directory where the simulated data is located
		resolution : number of bins for the histogram
		plot_loc : directory where the plots will be placed
	"""

    # making a list of all the files
    file_list = glob.glob(f"{data_loc}/experiment*")
    # importing all of the data from the experiments
    print("Importing data...")
    all_data = np.array([np.load(file) for file in file_list])
    # extracting time series (assumes common time scaling across exps)
    ts = all_data[0][0]
    # extracting all position datadata_lo
    pos_data = np.array([all_data[i][1] for i in range(len(all_data))])

    print("Producing plots...")
    # making histogram plots

    # creating a folder to save the plots
    make_dir(plot_loc)

    for i in range(len(ts)):
        print(f"\r{i}/{len(ts)}", end="")
        plt.clf()  # clear figure
        plt.xlim(-1.5, 1.5)  # setting common x axis
        # we are taking the histogram across experiments
        # for each timestep, hence the transposing
        plt.hist(pos_data.T[i], bins=resolution,
                 range=(-1.0, 1.0))  # plotting histogram
        plt.title(f"Time : {ts[i]} units")  # keeping track of time
        plt.savefig(f"./{plot_loc}/step-{i:05n}.png")
    print("\nPlot production complete!")
예제 #14
0
def main():
    if len(sys.argv) != 8:
        usage()
    train_sampled_data_fn = sys.argv[1]
    helper.check_file_exist(train_sampled_data_fn)
    outDir = sys.argv[2]
    helper.make_dir(outDir)
    all_ct_segment_folder = sys.argv[3]
    helper.check_dir_exist(all_ct_segment_folder)
    num_chromHMM_state = helper.get_command_line_integer(sys.argv[4])
    validate_ct = sys.argv[5]
    train_mode = sys.argv[6]
    all_ct_list_fn = sys.argv[7]
    print "Done getting command line arguments"
    # get the list of all genomic positions used to segment the genome for our model training (we exclude chromosome Y in all analysis)
    gen_pos_list = get_genomic_positions_list(all_ct_segment_folder)
    # get all cell types
    ct_list = get_all_train_ct_list(all_ct_list_fn, validate_ct)
    # call all cell types
    call_cross_validation_functions(validate_ct, ct_list, outDir,
                                    train_sampled_data_fn,
                                    all_ct_segment_folder, num_chromHMM_state,
                                    gen_pos_list, train_mode)
예제 #15
0
def averaging_predictions_to_validate_one_ct(validate_ct_dir, validate_ct,
                                             num_pred_ct, gen_pos_list):
    # num_pred_ct: number of ct whose predictions we use to average out and get the predictions for the validate cell type
    # validate_ct_dir = the directory where the data that are associated with the ct used for validation  cell type
    pred_dir_list = glob.glob(validate_ct_dir + "/pred_*")
    pred_ct_list = map(lambda x: (x.split('/')[-1]).split('_')[-1],
                       pred_dir_list)  # path/to/pred_E034 --> E034
    assert len(
        pred_dir_list
    ) == num_pred_ct, 'Number of pred_dir_list is not the same as number of specificed ct used to predict the model'
    # get the folder where the results of averaging across different prediction cell types will be stored
    validate_outDir = os.path.join(validate_ct_dir, 'average_predictions')
    helper.make_dir(validate_outDir)
    for gene_window in gen_pos_list:  # loop through each genomic window and then get the avrage result across different predictions for all positions in this window
        this_window_output_fn = os.path.join(validate_outDir,
                                             gene_window + "_avg_pred.txt.gz")
        this_window_pred_fn_list = map(
            lambda x: os.path.join(x, gene_window + "_pred_out.txt.gz"),
            pred_dir_list)
        # calculate the average prediction results across different prediction cell types for this window, and save the results
        average_multiple_result_files(this_window_pred_fn_list,
                                      this_window_output_fn)
    return
def averaging_predictions_to_validate_one_ct(validate_ct_dir, validate_ct,
                                             num_pred_ct):
    # num_pred_ct: number of ct whose predictions we use to average out and get the predictions for the validate cell type
    # validate_ct_dir = the directory where the data that are associated with the ct used for validation  cell type
    pred_dir_list = glob.glob(validate_ct_dir + "/pred_*")
    pred_ct_list = map(lambda x: (x.split('/')[-1]).split('_')[-1],
                       pred_dir_list)  # path/to/pred_E034 --> E034
    assert len(
        pred_dir_list
    ) == num_pred_ct, 'Number of pred_dir_list is not the same as number of specificed ct used to predict the model'
    # get the folder where the results of averaging across different prediction cell types will be stored
    validate_outDir = os.path.join(validate_ct_dir, 'average_predictions')
    helper.make_dir(validate_outDir)
    for chrom_index in helper.CHROMOSOME_LIST:  # loop through each genomic window and then get the avrage result across different predictions for all positions in this window
        this_window_pred_fn_list = map(
            lambda x: os.path.join(
                x, 'chr' + str(chrom_index) + "_pred_out.txt.gz"),
            pred_dir_list)
        # calculate the average prediction results across different prediction cell types for this window, and save the results
        average_multiple_result_files(
            this_window_pred_fn_list, chrom_index, validate_outDir
        )  # this function will take the average of predictions of multiple cell types, for each genomic bin(200bp), and then divide the averaged data for each chromosome into
        print "Done averaging results for chromosome: " + str(chrom_index)
    return
예제 #17
0
    def __init__(self, flags, model_name=""):

        # Model Parameters
        self.filters = flags.filters
        self.min_filters = flags.min_filters
        self.nin_filters = flags.nin_filters
        self.nin_filters2 = flags.nin_filters2 if flags.nin_filters2 != 0 else flags.nin_filters // 2
        self.cnn_size = flags.cnn_size
        self.last_cnn_size = flags.last_cnn_size
        self.cnn_stride = 1
        self.layers = flags.layers
        self.nin = flags.nin
        self.bicubic_init = flags.bicubic_init
        self.dropout = flags.dropout
        self.activator = flags.activator
        self.filters_decay_gamma = flags.filters_decay_gamma

        # Training Parameters
        self.initializer = flags.initializer
        self.weight_dev = flags.weight_dev
        self.l2_decay = flags.l2_decay
        self.optimizer = flags.optimizer
        self.beta1 = flags.beta1
        self.beta2 = flags.beta2
        self.momentum = flags.momentum
        self.batch_num = flags.batch_num
        self.batch_image_size = flags.batch_image_size
        if flags.stride_size == 0:
            self.stride_size = flags.batch_image_size // 2
        else:
            self.stride_size = flags.stride_size

        # Learning Rate Control for Training
        self.initial_lr = flags.initial_lr
        self.lr_decay = flags.lr_decay
        self.lr_decay_epoch = flags.lr_decay_epoch

        # Dataset or Others
        self.dataset = flags.dataset
        self.test_dataset = flags.test_dataset

        # Image Processing Parameters
        self.scale = flags.scale
        self.max_value = flags.max_value
        self.channels = flags.channels
        self.jpeg_mode = flags.jpeg_mode
        self.output_channels = self.scale * self.scale

        # Environment (all directory name should not contain '/' after )
        self.checkpoint_dir = flags.checkpoint_dir
        self.tf_log_dir = flags.tf_log_dir

        # Debugging or Logging
        self.debug = flags.debug
        self.save_loss = flags.save_loss
        self.save_weights = flags.save_weights
        self.save_images = flags.save_images
        self.save_images_num = flags.save_images_num
        self.log_weight_image_num = 32

        # initialize variables
        self.name = self.get_model_name(model_name)
        self.batch_input = self.batch_num * [None]
        self.batch_input_quad = np.zeros(shape=[
            self.batch_num, self.batch_image_size, self.batch_image_size,
            self.scale * self.scale
        ])
        self.batch_true_quad = np.zeros(shape=[
            self.batch_num, self.batch_image_size, self.batch_image_size,
            self.scale * self.scale
        ])
        self.receptive_fields = 2 * self.layers + self.cnn_size - 2
        self.complexity = 0

        # initialize environment
        util.make_dir(self.checkpoint_dir)
        util.make_dir(flags.graph_dir)
        util.make_dir(self.tf_log_dir)
        if flags.initialise_tf_log:
            util.clean_dir(self.tf_log_dir)
        util.set_logging(flags.log_filename,
                         stream_log_level=logging.INFO,
                         file_log_level=logging.INFO,
                         tf_log_level=tf.logging.WARN)
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        self.sess = tf.InteractiveSession(config=config)
        self.init_train_step()

        logging.info("\nDCSCN -------------------------------------")
        logging.info("%s [%s]" % (util.get_now_date(), self.name))
예제 #18
0
def main(net_config, ckpt_for_init):
    
    ## load the config
    config = configs.Config(net_config)

    ## set the logger
    test_dir = os.path.join(config.log_dir, "test")
    log_dir = helper.make_dir([test_dir], re_create_dir = True)
    log_file = os.path.join(log_dir, config.net_config + '_test.txt')
    csv_file = os.path.join(log_dir, config.net_config + '_test.csv')
    logger = helper.Logger(log_file)
    logger.add(config.config_str, do_print=True)

    ## load the dasets from the csv file (train, val, feat_len)
    data = input_data.load_datasets(config.input_csv) # data has train.next_batch(xx) test.images. test.labels
    feat_len = data.feat_len

    ## set the input placeholders
    layer = 'input'
    with tf.name_scope(layer) as scope:
        x = tf.placeholder(tf.float32, [None, feat_len], name='input')
        y = tf.placeholder(tf.float32, [None, 1], name = 'output')
        keep_prob = tf.constant(1.0, name = 'keep_prob')

    ## call inference and compute the output
    y_ = deepnets.inference(config, input_tensors = {"x": x, "keep_prob": keep_prob})

    ## set the global step
    global_step = tf_utils.get_global_step()

    ## tensors to compute the validatoin loss
    with tf.name_scope('validation') as scope:
        val_loss = loss.compute_loss(est=y_, gt=y, loss_func= config.test_loss)
        val_summary =  tf.summary.scalar('val_loss', val_loss)

    init_op = tf.initialize_all_variables()
    sess = tf.Session()
    sess.run(init_op)

    ## saving and restoring operations
    restore_variables = tf_utils.get_model_varaibles() +\
                        tf.get_collection("GLOBAL_STEP")+\
                        tf.get_collection('BN_VARIABLES')
    saver = tf.train.Saver(restore_variables)
    step_init = tf_utils.restore_model(config, sess, restore_variables, ckpt_for_init, logger)

    summary_writer = tf.summary.FileWriter(log_dir, sess.graph)

    # do the validation
    features = np.concatenate((data.train.features, data.val.features), axis=0)
    output= np.concatenate((data.train.output, data.val.output), axis=0)
    feed = {x:features, y: output}
    est, v_loss, v_summary = sess.run([y_, val_loss, val_summary], feed_dict=feed)

    # input_headers  = [x.encode('latin1') for x in data.input_header]
    headers = ','.join(data.input_header) + ", gt-y, est-y"

    vals = np.concatenate((features, output, est), axis=1)
    
    # append dataset default mu and sigma for estimated values
    mu = np.append(data.mu, data.mu[-1])
    sigma = np.append(data.sigma, data.sigma[-1])

    # reverse the standardization operation to the vals
    vals = np.add(vals * sigma, mu)


    np.savetxt(csv_file, vals, header= headers, delimiter=",")
    summary_writer.add_summary(v_summary, step_init)
    logger.add('val_loss {:f}'.format(v_loss), do_print=True)
    logger.save()
예제 #19
0
def signal_ensemble(data_loc, resolution, plot_loc):
    """
	A function that returns heatmap of the position distributions
		data_loc : directory where the simulated data is located
		resolution : number of bins for the histogram
		plot_loc : directory where the plots will be placed
	"""
    # making a list of all the files
    file_list = glob.glob(f"{data_loc}/experiment*")
    # importing all of the data from the experiments
    print("Importing data...")
    all_data = np.array([np.load(file) for file in file_list])
    # extracting time series (assumes common time scaling across exps)
    ts = all_data[0][0]
    # extracting all position datadata_lo
    pos_data = np.array([all_data[i][1] for i in range(len(all_data))])

    print("Producing plots...")
    # making histogram plots

    # creating a folder to save the plots
    make_dir(plot_loc)

    # range of histogram will be experiment agnostic and is determined from the data directly

    amplitude = max(abs(pos_data[0])) * 1.5  # for starters

    ensemble_histogram = np.array([
        np.histogram(pos_data.T[i],
                     bins=resolution,
                     range=(-amplitude, amplitude),
                     weights=np.ones_like(pos_data.T[i]) /
                     len(pos_data.T[i]))[0] for i in range(len(ts))
    ])

    # A 2D matrix where each row is a histogram of position for each timestep

    # displaying the plot

    # using figures and subplots
    fig, ax = plt.subplots(1)
    fig.set_figheight(6)
    fig.set_figwidth(12)
    hist = ax.pcolor(
        ensemble_histogram.T,
        cmap='inferno')  # such that x-axis is time and y-axis is position
    # adding colorbar
    fig.colorbar(hist, ax=ax)
    # relabeling axes

    # making the ticks correct
    # firstly, the xticks
    x_t_pos = np.linspace(0, len(ts), 10)  # we are sticking to just 10 ticks
    x_t_labels = [f"{t:.2f}" for t in ts[::len(ts) // 10]
                  ]  # choosing the right time values
    ax.set_xticks(x_t_pos)
    ax.set_xticklabels(x_t_labels, fontsize=12)
    # now, the yticks
    y_t_pos = np.linspace(0, resolution, 10)
    y_t_labels = [f"{x:.2E}" for x in np.linspace(-amplitude, amplitude, 10)]
    ax.set_yticks(y_t_pos)
    ax.set_yticklabels(y_t_labels, fontsize=12)
    ax.set_xlabel("Time [s]", fontsize=16)
    ax.set_ylabel("Position [m]", fontsize=16)
    ax.set_title("Probability distribution of nanosphere across time",
                 fontsize=18)

    plt.savefig(f"{plot_loc}/signal_histogram.png", bbox_inches='tight')
    plt.show()
예제 #20
0
	PLOT_DIR = "plot"


	if timestep is None:
		timestep = 1e-3

	if label is None:
		# if a label is not provided, then use default naming conventions
		DATA_DIR += f"_n{NUM_TRIALS}_{timestamp}"
		PLOT_DIR += f"_n{NUM_TRIALS}_{timestamp}"
	else:
		# if a label is provided, then use label for directory naming
		DATA_DIR += f"_{label}"
		PLOT_DIR += f"_{label}"

	make_dir(DATA_DIR) # making the directory where the data will end up

	print("Running Simulations!")
	# first, running simulations
	print(f"Maximum simulation time\t:{max_time}\nDamping (gamma)\t:{gamma}\nTemperature (kB T)\t:{kBT}\nSaving frequency\t:{saving_freq} steps per save")
	# doing this multiple times so as to generate an average
	for trial_num in range(NUM_TRIALS):
		print(f"\r{trial_num}/{NUM_TRIALS}",end="")
		ts,xs,vs,ks,ps = simulation.trapSolver([simulation.var_stiffness,mass,max_time, gamma, kBT], timestep, saving_freq)
		# save this data
		simulation.save_data([ts,xs,vs,ks,ps],DIR_NAME=DATA_DIR,file_index=trial_num)
	
	print("\nSimulations are complete")

	# running statistics, particularly the histogram
	import statistics