예제 #1
0
def test_diff3():
    gs1 = GeneSignature(name="test1", gene2weight={'TP53': 0.8, 'SOX4': 0.75})
    gs2 = GeneSignature(name="test1", gene2weight={'TP53': 0.3, 'SOX2': 0.60})
    gsu = gs1.difference(gs2)
    assert 'SOX4' in gsu
    assert gsu.gene2weight['SOX4'] == 0.75
    assert len(gsu) == 1
예제 #2
0
def test_intersection3():
    gs1 = GeneSignature(name="test1", gene2weight={'TP53': 0.8, 'SOX4': 0.75})
    gs2 = GeneSignature(name="test1", gene2weight={'TP53': 0.3, 'SOX2': 0.60})
    gsu = gs1.intersection(gs2)
    assert len(gsu) == 1
    assert 'TP53' in gsu
    assert gsu.gene2weight['TP53'] == 0.8
예제 #3
0
def test_union1():
    gs1 = GeneSignature(name="test1", gene2weight=['TP53', 'SOX4'])
    gs2 = GeneSignature(name="test1", gene2weight=['TP53', 'SOX2'])
    gsu = gs1.union(gs2)
    assert 'TP53' in gsu
    assert 'SOX4' in gsu
    assert 'SOX2' in gsu
    assert len(gsu) == 3
예제 #4
0
def test_union3():
    gs1 = GeneSignature(name="test1", gene2weight={'TP53': 0.8, 'SOX4': 0.75})
    gs2 = GeneSignature(name="test1", gene2weight={'TP53': 0.3, 'SOX2': 0.60})
    gsu = gs1.union(gs2)
    assert 'TP53' in gsu
    assert gsu.gene2weight['TP53'] == 0.8
    assert 'SOX4' in gsu
    assert gsu.gene2weight['SOX4'] == 0.75
    assert 'SOX2' in gsu
    assert gsu.gene2weight['SOX2'] == 0.6
    assert len(gsu) == 3
예제 #5
0
def test_init1():
    gs1 = GeneSignature(name="test1", gene2weight=['TP53', 'SOX4'])
    assert 'TP53' in gs1
    assert 'SOX4' in gs1
    assert gs1.name == 'test1'
    assert len(gs1) == 2
    assert gs1.gene2weight['TP53'] == 1.0
    assert gs1.gene2weight['SOX4'] == 1.0
예제 #6
0
def test_init2():
    gs1 = GeneSignature(name="test1", gene2weight=[('TP53', 0.5), ('SOX4', 0.75)])
    assert 'TP53' in gs1
    assert 'SOX4' in gs1
    assert gs1.name == 'test1'
    assert len(gs1) == 2
    assert gs1.gene2weight['TP53'] == 0.5
    assert gs1.gene2weight['SOX4'] == 0.75
예제 #7
0
def test_init3():
    gs1 = GeneSignature(name="test1", gene2weight={'TP53': 0.5, 'SOX4': 0.75})
    assert 'TP53' in gs1
    assert 'SOX4' in gs1
    assert gs1.name == 'test1'
    assert len(gs1) == 2
    assert gs1.gene2weight['TP53'] == 0.5
    assert gs1.gene2weight['SOX4'] == 0.75
예제 #8
0
def test_head():
    gs1 = GeneSignature(name="test1", gene2weight={'TP53': 0.8, 'SOX4': 0.75})
    gs2 = gs1.head(1)
    assert gs2['TP53'] == 0.8
    assert len(gs2) == 1
    gs2 = gs1.head(2)
    assert gs2['TP53'] == 0.8
    assert gs2['SOX4'] == 0.75
    assert len(gs2) == 2
예제 #9
0
def test_rename():
    gs1 = GeneSignature(name="test1", gene2weight={'TP53': 0.5, 'SOX4': 0.75})
    gs2 = gs1.rename('test2')
    assert 'TP53' in gs2
    assert 'SOX4' in gs2
    assert gs2.name == 'test2'
    assert len(gs2) == 2
    assert gs2.gene2weight['TP53'] == 0.5
    assert gs2.gene2weight['SOX4'] == 0.75
예제 #10
0
def test_aucell_mismatch(exp_matrix, gs):
    percentiles = derive_auc_threshold(exp_matrix)
    gss = [
        GeneSignature(name="test",
                      gene2weight=list(map("FAKE{}".format, range(100))))
    ] + gs
    aucs_mtx = aucell(exp_matrix,
                      gss,
                      auc_threshold=percentiles[0.01],
                      num_workers=1)
    print(aucs_mtx.head())
예제 #11
0
def test_noweights():
    gs1 = GeneSignature(name="test1", gene2weight={'TP53': 0.8, 'SOX4': 0.75})
    gs2 = gs1.noweights()
    assert gs1['TP53'] == 0.8
    assert gs2['TP53'] == 1.0

    reg1 = Regulon(name='TP53 regulon', gene2weight={'TP53': 0.8, 'SOX4': 0.75}, transcription_factor="TP53", gene2occurrence={"TP53": 1})
    reg2 = reg1.noweights()
    assert reg1['TP53'] == 0.8
    assert reg2['TP53'] == 1.0
    assert isinstance(reg2, Regulon)
예제 #12
0
 def calculate_regulon_enrichment(self):
     # Calculate regulon enrichment per cell using AUCell.
     # Create regulons with weight based on given key
     print("Using {} to weight the genes when running AUCell.".format(
         self.auc_regulon_weights_key))
     regulon_signatures = list(
         map(
             lambda x: GeneSignature(
                 name=x.name,
                 gene2weight=self.get_regulon_gene_data(
                     x, self.auc_regulon_weights_key),
             ),
             self.regulons,
         ))
     auc_mtx = aucell(
         self.ex_mtx, regulon_signatures,
         num_workers=self.num_workers)  # (n_cells x n_regulons)
     auc_mtx = auc_mtx.loc[self.ex_mtx.index]
     return auc_mtx
예제 #13
0
    def signatures():
        for gene_sig_file_path in gene_sig_file_paths:
            gene_sig = pd.read_csv(gene_sig_file_path,
                                   sep='\t',
                                   header=None,
                                   index_col=None)
            fname = ntpath.basename(gene_sig_file_path)
            regulon = os.path.splitext(fname)[0]

            # Check if the file is the regulon frequency file
            if regulon == 'regulons':
                continue

            # Do some sanity checks
            if len(gene_sig.columns) == 0:
                raise Exception(
                    f"{gene_sig_file_path} has 0 columns. Requires .tsv with 1 or 2 columns. First column should be genes (required), second (optional) are weight for the given genes."
                )
            if len(gene_sig.columns) > 2:
                raise Exception(
                    f"{gene_sig_file_path} has more than 2 columns. Requires .tsv with 1 or 2 columns. First column should be genes, second (optional) are weight for the given genes."
                )
            if len(gene_sig.columns) == 1 or noweights:
                gene2weight = gene_sig[0]
            if len(gene_sig.columns) == 2 and not noweights:
                # Filter the genes based on the given weight_threshold
                # 1st column: genes
                # 2nd column: weights
                gene_sig = gene_sig[gene_sig[1] > weight_threshold]
                if len(gene_sig.index) == 0:
                    if show_warnings:
                        warnings.warn(
                            "{0} is empty after apply filter with weight_threshold > {1}"
                            .format(regulon, weight_threshold))
                    continue
                gene2weight = [tuple(x) for x in gene_sig.values]
            yield GeneSignature(name=regulon, gene2weight=gene2weight)
예제 #14
0
    def doGeneSetEnrichment(self, request, context):
        gene_set_file_path = os.path.join(self.dfh.get_gene_sets_dir(),
                                          request.geneSetFilePath)
        loom = self.lfh.get_loom(loom_file_path=request.loomFilePath)
        gse = _gse.GeneSetEnrichment(scope=self,
                                     method="AUCell",
                                     loom=loom,
                                     gene_set_file_path=gene_set_file_path,
                                     annotation='')

        # Running AUCell...
        yield gse.update_state(step=-1,
                               status_code=200,
                               status_message="Running AUCell...",
                               values=None)
        time.sleep(1)

        # Reading gene set...
        yield gse.update_state(step=0,
                               status_code=200,
                               status_message="Reading the gene set...",
                               values=None)
        with open(gse.gene_set_file_path, 'r') as f:
            # Skip first line because it contains the name of the signature
            gs = GeneSignature(name='Gene Signature #1',
                               gene2weight=[
                                   line.strip() for idx, line in enumerate(f)
                                   if idx > 0
                               ])
        time.sleep(1)

        if not gse.has_AUCell_rankings():
            # Creating the matrix as DataFrame...
            yield gse.update_state(step=1,
                                   status_code=200,
                                   status_message="Creating the matrix...",
                                   values=None)
            loom = self.lfh.get_loom(loom_file_path=request.loomFilePath)
            dgem = np.transpose(loom.get_connection()[:, :])
            ex_mtx = pd.DataFrame(data=dgem,
                                  index=loom.get_ca_attr_by_name("CellID"),
                                  columns=loom.get_genes())
            # Creating the rankings...
            start_time = time.time()
            yield gse.update_state(step=2.1,
                                   status_code=200,
                                   status_message="Creating the rankings...",
                                   values=None)
            rnk_mtx = create_rankings(ex_mtx=ex_mtx)
            # Saving the rankings...
            yield gse.update_state(step=2.2,
                                   status_code=200,
                                   status_message="Saving the rankings...",
                                   values=None)
            lp.create(gse.get_AUCell_ranking_filepath(), rnk_mtx.as_matrix(),
                      {"CellID": loom.get_cell_ids()},
                      {"Gene": loom.get_genes()})
            print("Debug: %s seconds elapsed ---" % (time.time() - start_time))
        else:
            # Load the rankings...
            yield gse.update_state(
                step=2,
                status_code=200,
                status_message="Rankings exists: loading...",
                values=None)
            rnk_loom = self.lfh.get_loom_connection(
                gse.get_AUCell_ranking_filepath())
            rnk_mtx = pd.DataFrame(data=rnk_loom[:, :],
                                   index=rnk_loom.ra.CellID,
                                   columns=rnk_loom.ca.Gene)

        # Calculating AUCell enrichment...
        start_time = time.time()
        yield gse.update_state(
            step=3,
            status_code=200,
            status_message="Calculating AUCell enrichment...",
            values=None)
        aucs = enrichment(rnk_mtx, gs).loc[:, "AUC"].values

        print("Debug: %s seconds elapsed ---" % (time.time() - start_time))
        yield gse.update_state(step=4,
                               status_code=200,
                               status_message=gse.get_method() +
                               " enrichment done!",
                               values=aucs)
예제 #15
0
def test_diff1():
    gs1 = GeneSignature(name="test1", gene2weight=['TP53', 'SOX4'])
    gs2 = GeneSignature(name="test1", gene2weight=['TP53', 'SOX2'])
    gsu = gs1.difference(gs2)
    assert 'SOX4' in gsu
    assert len(gsu) == 1
예제 #16
0
def test_immut():
    gs1 = GeneSignature(name="test1", gene2weight={'TP53': 0.5, 'SOX4': 0.75})
    with pytest.raises(attr.exceptions.FrozenInstanceError):
        gs1.name = 'rename'
    with pytest.raises(TypeError):
        gs1.gene2weight['TP53'] = 0.6
예제 #17
0
def test_genes():
    gs1 = GeneSignature(name="test1", gene2weight={'TP53': 0.5, 'SOX4': 0.75})
    assert gs1.genes == ('SOX4', 'TP53')
예제 #18
0
def test_dict():
    gs1 = GeneSignature(name="test1", gene2weight={'TP53': 0.5, 'SOX4': 0.75})
    assert gs1['TP53'] == 0.5
    assert gs1['SOX4'] == 0.75