def create_sample_dropout(sample_id, num_vars: int, bicluster_list: List[SimBicluster], epi_data: np.ndarray, num_clusters: int, epi_error: float, prob_drop_out: float): chosen_clusters = random.sample(bicluster_list, num_clusters) chosen_vars = set() for cluster in chosen_clusters: chosen_vars.update(cluster.vars) cluster.samples.add(sample_id) sample_clean = [ random.normalvariate(10, 1) if i in chosen_vars else random.normalvariate(0, 1) for i in range(num_vars) ] sample = [ sample_var if random.random() < prob_drop_out else 0 for sample_var in sample_clean ] epi_row = [ np.random.binomial(1, 1 - epi_error) if i in chosen_vars else np.random.binomial(1, epi_error) for i in range(num_vars) ] epi_data.__iadd__(epi_row) return sample
def create_sample_easy(sample_id, num_vars: int, bicluster_list: List[SimBicluster], epi_data: np.ndarray, num_tfs: int, epi_error: float, error_type: str): ''' :param sample_id: Sample unique id :param num_vars: number of genes :param bicluster_list: list of bicluster objects :param epi_data: epigenetic gene vector :param num_tfs: number of bicluters the sample belongs too :param epi_error: error in epigentic measurements :return: sample vector of gene measurements ''' #randomly choose biclusters that the chosen_clusters = random.sample(bicluster_list, num_tfs) chosen_vars = set() #union set of genes that are activated in sample for cluster in chosen_clusters: chosen_vars.update(cluster.vars) cluster.samples.add(sample_id) #sample = [random.normalvariate(5,1) if i in chosen_vars else random.normalvariate(0,1) #for i in range(num_vars)] if error_type == "mode": state_row = [ np.random.binomial(1, 1 - epi_error) if i in chosen_vars else np.random.binomial(1, epi_error) for i in range(num_vars) ] sample = [ random.normalvariate(3, 1) if state_row[i] == 1 else random.normalvariate(0, 1) for i in range(num_vars) ] else: sample = [ random.normalvariate(3, epi_error) if i in chosen_vars else random.normalvariate(0, epi_error) for i in range(num_vars) ] #update the epigenetic vector with noise epi_row = [1.0 if i in chosen_vars else 0.0 for i in range(num_vars)] #epi_row = [np.random.binomial(1, 1 - epi_error) if i in chosen_vars else np.random.binomial(1, epi_error) #for i in range(num_vars)] #epi_row = [1 if i in chosen_vars else 0 for i in range(num_vars)] epi_data.__iadd__(epi_row) return sample