Ejemplo n.º 1
0
def convert_to_refseq(assay_df, unit, data_type):
    # features of dataset
    columns_source = set(list(assay_df))
    new_names = {}
    count = 0
    for gene in columns_source:
        new_name = find_refseqs(gene)
        if new_name:
            new_names[gene] = new_name[0]
            count += 1
        else:
            new_names[gene] = gene
    assay_df.rename(columns=new_names, inplace=True)
    return assay_df, count
Ejemplo n.º 2
0
def convert_to_refseq(assay_df, platform):
    from wrappers.input.utils import find_refseqs
    # features of dataset
    columns_source = set(list(assay_df))
    new_names = {}
    count = 0
    for gene in columns_source:
        new_name = list(find_refseqs(gene))
        if new_name:
            for n in new_name:
                if n in platform:
                    new_names[gene] = n
                    count += 1
                    # find first and assign it to gene
                    break
        else:
            new_names[gene] = gene
    assay_df.rename(columns=new_names, inplace=True)
    return assay_df, count
Ejemplo n.º 3
0
def convert_to_refseq(assay_df, platform):
    from wrappers.input.utils import find_refseqs
    # features of dataset
    columns_source = set(list(assay_df))
    new_names = {}
    count = 0
    for gene in columns_source:
        new_name = list(find_refseqs(gene))
        if new_name:
            for n in new_name:
                if n in platform:
                    new_names[gene] = n
                    count += 1
                    # find first and assign it to gene
                    break
        else:
            new_names[gene] = gene
    assay_df.rename(columns=new_names, inplace=True)
    return assay_df, count
Ejemplo n.º 4
0
 def get_matrix_for_platform(self, exp, gene_list, mirna_list = None, symmetrize=True, tolower=False):
     from collections import defaultdict
     from wrappers.input.utils import find_refseqs
     hasht = dict(zip(gene_list, range(len(gene_list))))
     mirna_hasht = dict()
     if mirna_list is not None:
         mirna_hasht = dict(zip(mirna_list, range(len(mirna_list))))
     inter_hash = defaultdict(list)
     interactons = self.load_pairs()
     cols=[]
     rows=[]
     log.debug("transforming interactions")
     for ix in range(len(interactons)):
         a, b, val = interactons.iloc[ix]
         inter_hash[a].append([b, val])
     AllUpdated(
         exp.pk,
         comment=u"Transforming interaction matrix done",
         silent=False,
         mode=NotifyMode.INFO
     ).send()
     log.debug("transformation of interactions done")
     count = 0
     counter2 = 0
     counter3 = 0
     counter4 = 0
     size_hash = len(inter_hash)
     if mirna_list is None:
         for key, value in inter_hash.iteritems():
             count += 1
             if count % 500 == 0:
                 log.debug("translating gene %d", count)
                 AllUpdated(
                     exp.pk,
                     comment=u"Translating gene %s of %s" % (count, size_hash),
                     silent=False,
                     mode=NotifyMode.INFO
                 ).send()
             refseqs = find_refseqs(key)
             for refseq in refseqs:
                 counter2 += 1
                 if refseq not in hasht:
                     continue
                 if refseq in hasht:
                     for (gene, strength) in value:
                         # new_inters.append([(refseq, new_refseq, strength)
                         for new_refseq in find_refseqs(gene):
                             counter3 += 1
                             gi = refseq
                             gj = new_refseq
                             if gj not in hasht:
                                  continue
                             counter4 += 1
                             val = strength
                             if tolower:
                                 gi=gi.lower()
                                 gj=gj.lower()
                             cols.append(hasht[gi])
                             rows.append(hasht[gj])
     else:
         for key, value in inter_hash.iteritems():
             count += 1
             if count % 500 == 0:
                 log.debug("translating gene %d", count)
                 AllUpdated(
                     exp.pk,
                     comment=u"Translating gene %s of %s" % (count, size_hash),
                     silent=False,
                     mode=NotifyMode.INFO
                 ).send()
             refseqs = find_refseqs(key)
             for refseq in refseqs:
                 counter2 += 1
                 if refseq not in mirna_hasht:
                     continue
                 if refseq in mirna_hasht:
                     for (gene, strength) in value:
                         for new_refseq in find_refseqs(gene):
                             counter3 += 1
                             gi = refseq
                             gj = new_refseq
                             if gj not in hasht:
                                  continue
                             counter4 += 1
                             val = strength
                             if tolower:
                                 gi=gi.lower()
                                 gj=gj.lower()
                             rows.append(mirna_hasht[gi])
                             cols.append(hasht[gj])
     size = max(max(rows), max(cols)) + 1
     AllUpdated(
         exp.pk,
         comment=u"%d interactions were found." % len(cols),
         silent=False,
         mode=NotifyMode.INFO
     ).send()
     inters_matr = None
     # TODO fix for custom value of interactions
     if mirna_list is None:
         inters_matr = sp.coo_matrix((np.ones(len(cols)), (rows, cols)), (size, size))
     else:
         inters_matr = sp.coo_matrix((np.ones(len(cols)), (rows, cols)), (max(rows) + 1, max(cols) + 1))
     if symmetrize:
         inters_matr = inters_matr + inters_matr.T
         inters_matr.data /= inters_matr.data
     return inters_matr
Ejemplo n.º 5
0
    def get_matrix_for_platform(self,
                                exp,
                                gene_list,
                                mirna_list=None,
                                symmetrize=True,
                                identifiers=True,
                                tolower=False):
        if settings.CELERY_DEBUG:
            import sys
            sys.path.append(
                '/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg'
            )
            import pydevd
            pydevd.settrace('localhost',
                            port=6901,
                            stdoutToServer=True,
                            stderrToServer=True)

        from collections import defaultdict
        from wrappers.input.utils import find_refseqs
        log.debug(gene_list)
        if mirna_list:
            log.debug(mirna_list)
        regex = "^[A-Z][A-Z]_[a-zA-Z0-9.]*"
        if len(
                filter(
                    lambda x: x is not None,
                    map(lambda x: re.match(regex, str(x), re.IGNORECASE),
                        gene_list))) < (len(gene_list) * 0.5):
            new_g = []
            for gene in gene_list:
                rf = list(find_refseqs(gene))
                if len(rf) > 0:
                    new_g.append(rf[0])
                if len(rf) == 0:
                    new_g.append(gene)
            gene_list = new_g
        hasht = dict(zip(gene_list, range(len(gene_list))))

        mirna_hasht = dict()
        if mirna_list is not None:
            new_g = []
            for gene in mirna_list:
                rf = list(find_refseqs(gene))
                if len(rf) > 0:
                    new_g.append(rf[0])
                else:
                    new_g.append(gene)
            mirna_list = new_g
            mirna_hasht = dict(zip(mirna_list, range(len(mirna_list))))

        inter_hash = defaultdict(list)
        interactons = self.load_pairs()
        cols = []
        rows = []
        log.debug("transforming interactions")
        for ix in range(len(interactons)):
            a, b, val = interactons.iloc[ix]
            if mirna_list is not None:
                if self.x2_unit == 'mirbase':
                    inter_hash[b].append([a, val])
                else:
                    inter_hash[a].append([b, val])
            else:
                inter_hash[a].append([b, val])
        if exp:
            AllUpdated(exp.pk,
                       comment=u"Transforming interaction matrix done",
                       silent=False,
                       mode=NotifyMode.INFO).send()
        log.debug("transformation of interactions done")
        count = 0
        counter2 = 0
        counter3 = 0
        counter4 = 0
        size_hash = len(inter_hash)
        if mirna_list is None:
            for key, value in inter_hash.iteritems():
                count += 1
                if count % 500 == 0:
                    log.debug("translating gene %d", count)
                    if exp:
                        AllUpdated(exp.pk,
                                   comment=u"Translating gene %s of %s" %
                                   (count, size_hash),
                                   silent=False,
                                   mode=NotifyMode.INFO).send()
                refseqs = find_refseqs(key)
                for refseq in refseqs:
                    counter2 += 1
                    if refseq not in hasht:
                        continue
                    if refseq in hasht:
                        for (gene, strength) in value:
                            # new_inters.append([(refseq, new_refseq, strength)
                            for new_refseq in find_refseqs(gene):
                                counter3 += 1
                                gi = refseq
                                gj = new_refseq
                                if gj not in hasht:
                                    continue
                                counter4 += 1
                                val = strength
                                if tolower:
                                    gi = gi.lower()
                                    gj = gj.lower()
                                cols.append(hasht[gi])
                                rows.append(hasht[gj])
        else:
            for key, value in inter_hash.iteritems():
                count += 1
                if count % 500 == 0:
                    log.debug("translating miRNA %d", count)
                    if exp:
                        AllUpdated(exp.pk,
                                   comment=u"Translating miRNA %s of %s" %
                                   (count, size_hash),
                                   silent=False,
                                   mode=NotifyMode.INFO).send()
                refseqs = find_refseqs(key)
                for refseq in refseqs:
                    counter2 += 1
                    if refseq not in mirna_hasht:
                        continue
                    if refseq in mirna_hasht:
                        for (gene, strength) in value:
                            for new_refseq in find_refseqs(gene):
                                counter3 += 1
                                gi = refseq
                                gj = new_refseq
                                if gj not in hasht:
                                    continue
                                counter4 += 1
                                val = strength
                                if tolower:
                                    gi = gi.lower()
                                    gj = gj.lower()
                                rows.append(mirna_hasht[gi])
                                cols.append(hasht[gj])
        # size = max(max(rows), max(cols)) + 1
        if exp:
            AllUpdated(exp.pk,
                       comment=u"%d interactions were found." % len(cols),
                       silent=False,
                       mode=NotifyMode.INFO).send()
        inters_matr = None
        # TODO fix for custom value of interactions
        if mirna_list is None:
            # inters_matr = sp.coo_matrix((np.ones(len(cols)), (rows, cols)), (size, size))
            inters_matr = sp.coo_matrix((np.ones(len(cols)), (rows, cols)),
                                        (len(gene_list), len(gene_list)))
        else:
            inters_matr = sp.coo_matrix((np.ones(len(cols)), (rows, cols)),
                                        (len(mirna_list), len(gene_list)))
            #inters_matr = sp.coo_matrix((np.ones(len(cols)), (rows, cols)), (max(rows) + 1, max(cols) + 1))

        if symmetrize:
            inters_matr = inters_matr + inters_matr.T
            inters_matr.data /= inters_matr.data

        if identifiers:
            inters_matr = inters_matr.tocsr()
            sparse_df = pd.SparseDataFrame([
                pd.SparseSeries(inters_matr[i].toarray().ravel())
                for i in np.arange(inters_matr.shape[0])
            ])
            # sparse_df = sparse_df.to_dense()
            if mirna_list is None:
                index = gene_list[:sparse_df.shape[0]]
                columns = gene_list[:sparse_df.shape[1]]
            else:
                index = mirna_list[:sparse_df.shape[0]]
                columns = gene_list[:sparse_df.shape[1]]
            if settings.CELERY_DEBUG:
                import sys
                sys.path.append(
                    '/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg'
                )
                import pydevd
                pydevd.settrace('localhost',
                                port=6901,
                                stdoutToServer=True,
                                stderrToServer=True)

            # sparse_df['new_index'] = pd.Series(index, index=sparse_df.index)
            sparse_df.set_index([index], inplace=True)
            sparse_df.columns = columns
            return sparse_df
        return inters_matr
Ejemplo n.º 6
0
    def get_matrix_for_platform(self, exp, gene_list, mirna_list=None, symmetrize=True, identifiers=True,
                                tolower=False):
        if settings.CELERY_DEBUG:
            import sys
            sys.path.append('/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg')
            import pydevd
            pydevd.settrace('localhost', port=6901, stdoutToServer=True, stderrToServer=True)

        from collections import defaultdict
        from wrappers.input.utils import find_refseqs
        log.debug(gene_list)
        if mirna_list:
            log.debug(mirna_list)
        regex = "^[A-Z][A-Z]_[a-zA-Z0-9.]*"
        if len(filter(lambda x: x is not None, map(lambda x: re.match(regex, str(x), re.IGNORECASE), gene_list))) < (len(gene_list)*0.5):
            new_g = []
            for gene in gene_list:
                rf = list(find_refseqs(gene))
                if len(rf) > 0:
                    new_g.append(rf[0])
            gene_list = new_g
            # gene_list = map(lambda gene: list(find_refseqs(gene))[0], gene_list)


        hasht = dict(zip(gene_list, range(len(gene_list))))
        mirna_hasht = dict()
        if mirna_list is not None:
            mirna_hasht = dict(zip(mirna_list, range(len(mirna_list))))
        inter_hash = defaultdict(list)
        interactons = self.load_pairs()
        cols = []
        rows = []
        log.debug("transforming interactions")
        for ix in range(len(interactons)):
            a, b, val = interactons.iloc[ix]
            if mirna_list is not None:
                if self.x2_unit == 'mirbase':
                    inter_hash[b].append([a, val])
                else:
                    inter_hash[a].append([b, val])
            else:
                inter_hash[a].append([b, val])
        AllUpdated(
            exp.pk,
            comment=u"Transforming interaction matrix done",
            silent=False,
            mode=NotifyMode.INFO
        ).send()
        log.debug("transformation of interactions done")
        count = 0
        counter2 = 0
        counter3 = 0
        counter4 = 0
        size_hash = len(inter_hash)
        if mirna_list is None:
            for key, value in inter_hash.iteritems():
                count += 1
                if count % 500 == 0:
                    log.debug("translating gene %d", count)
                    AllUpdated(
                        exp.pk,
                        comment=u"Translating gene %s of %s" % (count, size_hash),
                        silent=False,
                        mode=NotifyMode.INFO
                    ).send()
                refseqs = find_refseqs(key)
                for refseq in refseqs:
                    counter2 += 1
                    if refseq not in hasht:
                        continue
                    if refseq in hasht:
                        for (gene, strength) in value:
                            # new_inters.append([(refseq, new_refseq, strength)
                            for new_refseq in find_refseqs(gene):
                                counter3 += 1
                                gi = refseq
                                gj = new_refseq
                                if gj not in hasht:
                                    continue
                                counter4 += 1
                                val = strength
                                if tolower:
                                    gi = gi.lower()
                                    gj = gj.lower()
                                cols.append(hasht[gi])
                                rows.append(hasht[gj])
        else:
            for key, value in inter_hash.iteritems():
                count += 1
                if count % 500 == 0:
                    log.debug("translating miRNA %d", count)
                    AllUpdated(
                        exp.pk,
                        comment=u"Translating miRNA %s of %s" % (count, size_hash),
                        silent=False,
                        mode=NotifyMode.INFO
                    ).send()
                refseqs = find_refseqs(key)
                for refseq in refseqs:
                    counter2 += 1
                    if refseq not in mirna_hasht:
                        continue
                    if refseq in mirna_hasht:
                        for (gene, strength) in value:
                            for new_refseq in find_refseqs(gene):
                                counter3 += 1
                                gi = refseq
                                gj = new_refseq
                                if gj not in hasht:
                                    continue
                                counter4 += 1
                                val = strength
                                if tolower:
                                    gi = gi.lower()
                                    gj = gj.lower()
                                rows.append(mirna_hasht[gi])
                                cols.append(hasht[gj])
        size = max(max(rows), max(cols)) + 1
        AllUpdated(
            exp.pk,
            comment=u"%d interactions were found." % len(cols),
            silent=False,
            mode=NotifyMode.INFO
        ).send()
        inters_matr = None
        # TODO fix for custom value of interactions
        if mirna_list is None:
            inters_matr = sp.coo_matrix((np.ones(len(cols)), (rows, cols)), (size, size))
        else:
            inters_matr = sp.coo_matrix((np.ones(len(cols)), (rows, cols)), (max(rows) + 1, max(cols) + 1))

        if symmetrize:
            inters_matr = inters_matr + inters_matr.T
            inters_matr.data /= inters_matr.data

        if identifiers:
            inters_matr = inters_matr.tocsr()
            sparse_df = pd.SparseDataFrame([pd.SparseSeries(inters_matr[i].toarray().ravel())
                                            for i in np.arange(inters_matr.shape[0])])
            sparse_df = sparse_df.to_dense()
            if mirna_list is None:
                index = gene_list[:sparse_df.shape[0]]
                columns = gene_list[:sparse_df.shape[1]]
            else:
                index = mirna_list[:sparse_df.shape[0]]
                columns = gene_list[:sparse_df.shape[1]]
            sparse_df['new_index'] = pd.Series(index, index=sparse_df.index)
            sparse_df.set_index(['new_index'], inplace=True)
            sparse_df.columns = columns
            return sparse_df
        return inters_matr