def test_diff3(): gs1 = GeneSignature(name="test1", gene2weight={'TP53': 0.8, 'SOX4': 0.75}) gs2 = GeneSignature(name="test1", gene2weight={'TP53': 0.3, 'SOX2': 0.60}) gsu = gs1.difference(gs2) assert 'SOX4' in gsu assert gsu.gene2weight['SOX4'] == 0.75 assert len(gsu) == 1
def test_intersection3(): gs1 = GeneSignature(name="test1", gene2weight={'TP53': 0.8, 'SOX4': 0.75}) gs2 = GeneSignature(name="test1", gene2weight={'TP53': 0.3, 'SOX2': 0.60}) gsu = gs1.intersection(gs2) assert len(gsu) == 1 assert 'TP53' in gsu assert gsu.gene2weight['TP53'] == 0.8
def test_union1(): gs1 = GeneSignature(name="test1", gene2weight=['TP53', 'SOX4']) gs2 = GeneSignature(name="test1", gene2weight=['TP53', 'SOX2']) gsu = gs1.union(gs2) assert 'TP53' in gsu assert 'SOX4' in gsu assert 'SOX2' in gsu assert len(gsu) == 3
def test_union3(): gs1 = GeneSignature(name="test1", gene2weight={'TP53': 0.8, 'SOX4': 0.75}) gs2 = GeneSignature(name="test1", gene2weight={'TP53': 0.3, 'SOX2': 0.60}) gsu = gs1.union(gs2) assert 'TP53' in gsu assert gsu.gene2weight['TP53'] == 0.8 assert 'SOX4' in gsu assert gsu.gene2weight['SOX4'] == 0.75 assert 'SOX2' in gsu assert gsu.gene2weight['SOX2'] == 0.6 assert len(gsu) == 3
def test_init1(): gs1 = GeneSignature(name="test1", gene2weight=['TP53', 'SOX4']) assert 'TP53' in gs1 assert 'SOX4' in gs1 assert gs1.name == 'test1' assert len(gs1) == 2 assert gs1.gene2weight['TP53'] == 1.0 assert gs1.gene2weight['SOX4'] == 1.0
def test_init2(): gs1 = GeneSignature(name="test1", gene2weight=[('TP53', 0.5), ('SOX4', 0.75)]) assert 'TP53' in gs1 assert 'SOX4' in gs1 assert gs1.name == 'test1' assert len(gs1) == 2 assert gs1.gene2weight['TP53'] == 0.5 assert gs1.gene2weight['SOX4'] == 0.75
def test_init3(): gs1 = GeneSignature(name="test1", gene2weight={'TP53': 0.5, 'SOX4': 0.75}) assert 'TP53' in gs1 assert 'SOX4' in gs1 assert gs1.name == 'test1' assert len(gs1) == 2 assert gs1.gene2weight['TP53'] == 0.5 assert gs1.gene2weight['SOX4'] == 0.75
def test_head(): gs1 = GeneSignature(name="test1", gene2weight={'TP53': 0.8, 'SOX4': 0.75}) gs2 = gs1.head(1) assert gs2['TP53'] == 0.8 assert len(gs2) == 1 gs2 = gs1.head(2) assert gs2['TP53'] == 0.8 assert gs2['SOX4'] == 0.75 assert len(gs2) == 2
def test_rename(): gs1 = GeneSignature(name="test1", gene2weight={'TP53': 0.5, 'SOX4': 0.75}) gs2 = gs1.rename('test2') assert 'TP53' in gs2 assert 'SOX4' in gs2 assert gs2.name == 'test2' assert len(gs2) == 2 assert gs2.gene2weight['TP53'] == 0.5 assert gs2.gene2weight['SOX4'] == 0.75
def test_aucell_mismatch(exp_matrix, gs): percentiles = derive_auc_threshold(exp_matrix) gss = [ GeneSignature(name="test", gene2weight=list(map("FAKE{}".format, range(100)))) ] + gs aucs_mtx = aucell(exp_matrix, gss, auc_threshold=percentiles[0.01], num_workers=1) print(aucs_mtx.head())
def test_noweights(): gs1 = GeneSignature(name="test1", gene2weight={'TP53': 0.8, 'SOX4': 0.75}) gs2 = gs1.noweights() assert gs1['TP53'] == 0.8 assert gs2['TP53'] == 1.0 reg1 = Regulon(name='TP53 regulon', gene2weight={'TP53': 0.8, 'SOX4': 0.75}, transcription_factor="TP53", gene2occurrence={"TP53": 1}) reg2 = reg1.noweights() assert reg1['TP53'] == 0.8 assert reg2['TP53'] == 1.0 assert isinstance(reg2, Regulon)
def calculate_regulon_enrichment(self): # Calculate regulon enrichment per cell using AUCell. # Create regulons with weight based on given key print("Using {} to weight the genes when running AUCell.".format( self.auc_regulon_weights_key)) regulon_signatures = list( map( lambda x: GeneSignature( name=x.name, gene2weight=self.get_regulon_gene_data( x, self.auc_regulon_weights_key), ), self.regulons, )) auc_mtx = aucell( self.ex_mtx, regulon_signatures, num_workers=self.num_workers) # (n_cells x n_regulons) auc_mtx = auc_mtx.loc[self.ex_mtx.index] return auc_mtx
def signatures(): for gene_sig_file_path in gene_sig_file_paths: gene_sig = pd.read_csv(gene_sig_file_path, sep='\t', header=None, index_col=None) fname = ntpath.basename(gene_sig_file_path) regulon = os.path.splitext(fname)[0] # Check if the file is the regulon frequency file if regulon == 'regulons': continue # Do some sanity checks if len(gene_sig.columns) == 0: raise Exception( f"{gene_sig_file_path} has 0 columns. Requires .tsv with 1 or 2 columns. First column should be genes (required), second (optional) are weight for the given genes." ) if len(gene_sig.columns) > 2: raise Exception( f"{gene_sig_file_path} has more than 2 columns. Requires .tsv with 1 or 2 columns. First column should be genes, second (optional) are weight for the given genes." ) if len(gene_sig.columns) == 1 or noweights: gene2weight = gene_sig[0] if len(gene_sig.columns) == 2 and not noweights: # Filter the genes based on the given weight_threshold # 1st column: genes # 2nd column: weights gene_sig = gene_sig[gene_sig[1] > weight_threshold] if len(gene_sig.index) == 0: if show_warnings: warnings.warn( "{0} is empty after apply filter with weight_threshold > {1}" .format(regulon, weight_threshold)) continue gene2weight = [tuple(x) for x in gene_sig.values] yield GeneSignature(name=regulon, gene2weight=gene2weight)
def doGeneSetEnrichment(self, request, context): gene_set_file_path = os.path.join(self.dfh.get_gene_sets_dir(), request.geneSetFilePath) loom = self.lfh.get_loom(loom_file_path=request.loomFilePath) gse = _gse.GeneSetEnrichment(scope=self, method="AUCell", loom=loom, gene_set_file_path=gene_set_file_path, annotation='') # Running AUCell... yield gse.update_state(step=-1, status_code=200, status_message="Running AUCell...", values=None) time.sleep(1) # Reading gene set... yield gse.update_state(step=0, status_code=200, status_message="Reading the gene set...", values=None) with open(gse.gene_set_file_path, 'r') as f: # Skip first line because it contains the name of the signature gs = GeneSignature(name='Gene Signature #1', gene2weight=[ line.strip() for idx, line in enumerate(f) if idx > 0 ]) time.sleep(1) if not gse.has_AUCell_rankings(): # Creating the matrix as DataFrame... yield gse.update_state(step=1, status_code=200, status_message="Creating the matrix...", values=None) loom = self.lfh.get_loom(loom_file_path=request.loomFilePath) dgem = np.transpose(loom.get_connection()[:, :]) ex_mtx = pd.DataFrame(data=dgem, index=loom.get_ca_attr_by_name("CellID"), columns=loom.get_genes()) # Creating the rankings... start_time = time.time() yield gse.update_state(step=2.1, status_code=200, status_message="Creating the rankings...", values=None) rnk_mtx = create_rankings(ex_mtx=ex_mtx) # Saving the rankings... yield gse.update_state(step=2.2, status_code=200, status_message="Saving the rankings...", values=None) lp.create(gse.get_AUCell_ranking_filepath(), rnk_mtx.as_matrix(), {"CellID": loom.get_cell_ids()}, {"Gene": loom.get_genes()}) print("Debug: %s seconds elapsed ---" % (time.time() - start_time)) else: # Load the rankings... yield gse.update_state( step=2, status_code=200, status_message="Rankings exists: loading...", values=None) rnk_loom = self.lfh.get_loom_connection( gse.get_AUCell_ranking_filepath()) rnk_mtx = pd.DataFrame(data=rnk_loom[:, :], index=rnk_loom.ra.CellID, columns=rnk_loom.ca.Gene) # Calculating AUCell enrichment... start_time = time.time() yield gse.update_state( step=3, status_code=200, status_message="Calculating AUCell enrichment...", values=None) aucs = enrichment(rnk_mtx, gs).loc[:, "AUC"].values print("Debug: %s seconds elapsed ---" % (time.time() - start_time)) yield gse.update_state(step=4, status_code=200, status_message=gse.get_method() + " enrichment done!", values=aucs)
def test_diff1(): gs1 = GeneSignature(name="test1", gene2weight=['TP53', 'SOX4']) gs2 = GeneSignature(name="test1", gene2weight=['TP53', 'SOX2']) gsu = gs1.difference(gs2) assert 'SOX4' in gsu assert len(gsu) == 1
def test_immut(): gs1 = GeneSignature(name="test1", gene2weight={'TP53': 0.5, 'SOX4': 0.75}) with pytest.raises(attr.exceptions.FrozenInstanceError): gs1.name = 'rename' with pytest.raises(TypeError): gs1.gene2weight['TP53'] = 0.6
def test_genes(): gs1 = GeneSignature(name="test1", gene2weight={'TP53': 0.5, 'SOX4': 0.75}) assert gs1.genes == ('SOX4', 'TP53')
def test_dict(): gs1 = GeneSignature(name="test1", gene2weight={'TP53': 0.5, 'SOX4': 0.75}) assert gs1['TP53'] == 0.5 assert gs1['SOX4'] == 0.75