Ejemplo n.º 1
0
def bunch_upload_task(exp, block):
    seq = []
    for pheno_name, (es_mRNA_name, es_miRNA_name) in block.pheno_by_es_names.iteritems():
        es_mRNA_ufw = block.es_mRNA_matrices[es_mRNA_name]
        es_mRNA_df = es_mRNA_ufw.get_as_data_frame(block.csv_sep_m_rna)
        es_miRNA_ufw = None
        if es_miRNA_name is not None:
            es_miRNA_ufw = block.es_miRNA_matrices[es_miRNA_name]
        if es_miRNA_ufw is not None:
            es_miRNA_df = es_miRNA_ufw.get_as_data_frame(block.csv_sep_mi_rna)

        pheno_ufw = block.pheno_matrices[pheno_name]
        pheno_df = pheno_ufw.get_as_data_frame(block.csv_sep)

        es_mRNA, es_mRNA_df, gpl_file = process_data_frame(exp, block, es_mRNA_df, block.es_mRNA_matrices_ori, block.m_rna_platform, block.m_rna_unit, "m_rna")
        if es_miRNA_name is not None:
            es_miRNA, es_miRNA_df, gpl_file = process_data_frame(exp, block, es_miRNA_df, block.es_miRNA_matrices_ori, block.mi_rna_platform, block.mi_rna_unit, "mi_rna")

        pheno_df.set_index(pheno_df.columns[0], inplace=True)
        es_mRNA_sample_names = sorted(es_mRNA_df.index.tolist())
        if es_miRNA_name is not None:
            es_miRNA_sample_names = sorted(es_miRNA_df.index.tolist())

        pheno_sample_names = sorted(pheno_df.index.tolist())
        if es_mRNA_sample_names != pheno_sample_names:
            msg = "Couldn't match `%s` and `%s` due to different sample name sets" % (es_mRNA_name, pheno_name)
            AllUpdated(
                exp.pk,
                comment=msg,
                silent=False,
                mode=NotifyMode.ERROR
            ).send()
            raise RuntimeError(msg)
        if es_miRNA_name is not None:
            if es_miRNA_sample_names != pheno_sample_names:
                msg = "Couldn't match `%s` and `%s` due to different sample name sets" % (es_miRNA_name, pheno_name)
                AllUpdated(
                    exp.pk,
                    comment=msg,
                    silent=False,
                    mode=NotifyMode.ERROR
                ).send()
                raise RuntimeError(msg)

        es_mRNA.store_pheno_data_frame(pheno_df)
        if es_miRNA_name is not None:
            es_miRNA.store_pheno_data_frame(pheno_df)

        es_mRNA.pheno_metadata["user_class_title"] = pheno_df.columns[0]
        if es_miRNA_name is not None:
            es_miRNA.pheno_metadata["user_class_title"] = pheno_df.columns[0]
        else:
            es_miRNA = None
        seq.append({"mRNA_es": es_mRNA, "miRNA_es": es_miRNA, "__label__": es_mRNA_name})
    block.seq = seq
    return [block], {}
Ejemplo n.º 2
0
def upload_interaction_task(exp, block):
    AllUpdated(exp.pk,
               comment=u"Processing Upload Interaction block",
               silent=False,
               mode=NotifyMode.INFO).send()
    if settings.CELERY_DEBUG:
        import sys
        sys.path.append(
            '/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg'
        )
        import pydevd
        pydevd.settrace('localhost',
                        port=6901,
                        stdoutToServer=True,
                        stderrToServer=True)

    # Convert to  BinaryInteraction
    sep = getattr(block, "csv_sep", " ")
    if block.header:
        _header = 0
    else:
        _header = None
    interaction_df = block.upload_interaction.get_as_data_frame(sep=sep,
                                                                header=_header)
    sd = None
    # if self.bi_data_type in ["pairs", "triples", "pairs_diff", "triples_diff"]:
    # we have to find a shape of interaction matrix
    features_1 = interaction_df[interaction_df.columns[0]].tolist()
    features_2 = interaction_df[interaction_df.columns[1]].tolist()
    interactions = []
    if block.bi_data_type in ["triples", "triples_diff"]:
        interactions = zip(features_1, features_2,
                           interaction_df[interaction_df.columns[2]].tolist())
    else:
        interactions = zip(features_1, features_2, [1] * len(features_1))
    # new_inters = [expand_inters(inters_a, inters_b, value) for (inters_a, inters_b, value) in interactions]
    # new_inters = [item for sublist in new_inters for item in sublist] # flatten
    features_1 = [a for [a, _, _] in interactions]
    features_2 = [b for [_, b, _] in interactions]
    values = [c for [_, _, c] in interactions]
    interaction_df = pd.DataFrame()
    interaction_df[0] = features_1
    interaction_df[1] = features_2
    interaction_df[2] = values
    interaction = BinaryInteraction(exp.get_data_folder(), str(block.uuid))
    interaction.store_pairs(interaction_df, block.bi_data_type)
    interaction.x1_unit = getattr(block, "x1_unit", "")
    interaction.x2_unit = getattr(block, "x2_unit", "")
    interaction.header = block.header
    AllUpdated(exp.pk,
               comment=u"Processing of Upload Interaction block is done",
               silent=False,
               mode=NotifyMode.INFO).send()
    return [interaction], {}
Ejemplo n.º 3
0
    def execute(self, is_init_action=False):
        self.build_dag(
            self.exp.build_block_dependencies_by_scope(self.scope_name))

        blocks_to_execute = []
        working_blocks = []

        blocks_dict = dict(self.exp.get_blocks(self.exp.get_all_block_uuids()))
        for block_uuid in self.dag.topological_order:
            block = blocks_dict[block_uuid]
            if is_init_action and block.is_block_supports_auto_execution and block.get_exec_status(
            ) == "done":
                block.do_action("reset_execution", self.exp)

            if block.get_exec_status() == "ready" and \
                    self.is_block_inputs_are_satisfied(block_uuid, blocks_dict) and block.is_block_supports_auto_execution:
                blocks_to_execute.append(block)
            if block.get_exec_status() == "working" and \
                    self.is_block_inputs_are_satisfied(block_uuid, blocks_dict):
                working_blocks.append(block)

        if not blocks_to_execute and not working_blocks:
            log.debug("Nothing to execute in scope `%s` for exp `%s`",
                      self.scope_name, self.exp.pk)
            if self.scope_name != "root":
                block = self.exp.get_meta_block_by_sub_scope(self.scope_name)
                block.do_action("on_sub_scope_done", self.exp)
            else:
                AllUpdated(self.exp.pk,
                           comment=u"Workflow execution completed",
                           mode=NotifyMode.SUCCESS,
                           silent=False).send()
                self.exp.log("root", "Experiment finished.")
                self.exp.done()
        elif blocks_to_execute:
            # for block in blocks_to_execute:
            # block.do_action("execute", self.exp
            AllUpdated(self.exp.pk,
                       comment=u"Executing Block %s" %
                       blocks_to_execute[0].name.decode("utf-8"),
                       mode=NotifyMode.SUCCESS,
                       silent=False).send()
            blocks_to_execute[0].do_action("execute", self.exp)
Ejemplo n.º 4
0
def process_data_frame(exp, block, df, ori, platform, unit, data_type="m_rna"):
    if settings.CELERY_DEBUG:
        import sys
        sys.path.append(
            '/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg'
        )
        import pydevd
        pydevd.settrace('localhost',
                        port=6901,
                        stdoutToServer=True,
                        stderrToServer=True)

    df.set_index(df.columns[0], inplace=True)
    # if matrix is bad oriented, then do transposition
    if ori == "GxS":
        df = df.T
        # df.columns = df.iloc[0]
        # df = df.drop(df.index[0])
    # if isinstance(df.columns[0][0], basestring):
    gpl_file = None
    if platform:
        AllUpdated(exp.pk,
                   comment=u"Fetching platform %s" % platform,
                   silent=False,
                   mode=NotifyMode.INFO).send()
        gpl_file = fetch_geo_gpl(exp, block, platform)
        df, matched = convert_ids(gpl_file, df, data_type)
        AllUpdated(exp.pk,
                   comment=u"Matched %s features for %s dataset" %
                   (matched, data_type),
                   silent=False,
                   mode=NotifyMode.INFO).send()
        unit = 'RefSeq'
    es = ExpressionSet(base_dir=exp.get_data_folder(),
                       base_filename="%s_%s_es" % (block.uuid, data_type))
    es.working_unit = unit
    es.store_assay_data_frame(df)
    return es, df, gpl_file
Ejemplo n.º 5
0
 def get_assay_data_frame_for_platform(self, exp, platform):
     """
         @rtype: pd.DataFrame
     """
     if self.assay_data_storage is None:
         raise RuntimeError("Assay data wasn't setup prior")
     p = set(platform)
     checksum = np.frombuffer("".join(platform), "uint8").sum()
     if checksum in self.df_platform:
         if self.df_platform[checksum]:
             if exp:
                 AllUpdated(exp.pk,
                            comment=u"Loading Expression Set from Cache",
                            silent=False,
                            mode=NotifyMode.INFO).send()
             return self.df_platform[checksum].load()
     if self.working_unit != 'RefSeq':
         if exp:
             AllUpdated(exp.pk,
                        comment=u"Converting unit %s to RefSeq" %
                        self.working_unit,
                        silent=False,
                        mode=NotifyMode.INFO).send()
         df = self.assay_data_storage.load()
         df, matched = convert_to_refseq(df, p)
         self.df_platform[checksum] = DataFrameStorage(
             filepath="%s/%s_%s_assay.csv.gz" %
             (self.base_dir, self.base_filename, checksum))
         self.df_platform[checksum].store(df)
         if exp:
             AllUpdated(exp.pk,
                        comment=u"Converted %s %s to RefSeq" %
                        (matched, self.working_unit),
                        silent=False,
                        mode=NotifyMode.INFO).send()
         return df
     return self.assay_data_storage.load()
Ejemplo n.º 6
0
def halt_execution_task(exp, scope_name):
    log.debug("halt execution invoked")

    r = get_redis_instance()

    lock_key = ExpKeys.get_auto_exec_task_lock_key(exp.pk, scope_name)
    with redis_lock.Lock(r, lock_key):
        try:
            if scope_name == "root":
                AllUpdated(
                    exp.pk,
                    comment=u"An error occurred during experiment execution",
                    silent=False,
                    mode=NotifyMode.ERROR).send()
            else:
                block = exp.get_meta_block_by_sub_scope(scope_name)
                block.do_action("error", exp)
        except Exception, e:
            log.exception(e)
Ejemplo n.º 7
0
def nimfa_snmnmf_task(exp,
                      block,
                      mRNA,
                      miRNA,
                      # DNAmethyl,
                      gene2gene,
                      miRNA2gene,
                      # gene2DNAmethylation,
                      params,
                      base_filename
):
    if settings.CELERY_DEBUG:
        import sys
        sys.path.append('/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg')
        import pydevd
        pydevd.settrace('localhost', port=6901, stdoutToServer=True, stderrToServer=True)

    ### factorization ###
    # initialize the factorization
    # mi_rna = mi_rna[mi_rna.columns[0:30]]
    #     m_rna = m_rna[m_rna.columns[0:50]]
    #
    # TODO FULL
    mRNA_matrix = mRNA.get_assay_data_frame()
    mRNA_matrix = mRNA_matrix[mRNA_matrix.columns[0:2500]]
    miRNA_matrix = miRNA.get_assay_data_frame()
    miRNA_matrix = miRNA_matrix[miRNA_matrix.columns[0:2500]]

    gene_platform = list(mRNA_matrix.columns)
    mi_rna_platform = list(miRNA_matrix.columns)
    AllUpdated(
        exp.pk,
        comment=u"Transforming interaction matrix",
        silent=False,
        mode=NotifyMode.INFO
    ).send()

    g2g = gene2gene.get_matrix_for_platform(exp, gene_platform, identifiers=False)
    m2g = miRNA2gene.get_matrix_for_platform(exp, gene_platform, mi_rna_platform, identifiers=False, symmetrize=False)

    AllUpdated(
        exp.pk,
        comment=u"Transforming interaction matrix done",
        silent=False,
        mode=NotifyMode.INFO
    ).send()

    snm = ns.NIMFA_SNMNMF(mRNA=mRNA_matrix, miRNA=miRNA_matrix, DNAmethyl=None,
                          gene2gene=g2g,
                          miRNA2gene=m2g,
                          gene2DNAmethylation=None,
                          params=params)

    # run factorization
    try:
        snm.run(seed="random_c", rank=params['rank'], max_iter=5)
    except MFError as mfe:
        raise(Exception(mfe.message))
    W = mRNA.clone(base_filename + "_W")
    W.store_assay_data_frame(snm.W)

    H1_miRNA = mRNA.clone(base_filename + "_H1_miRNA")
    H1_miRNA.store_assay_data_frame(snm.H1_miRNA)

    H2_genes = mRNA.clone(base_filename + "_H2_genes")
    H2_genes.store_assay_data_frame(snm.H2_genes)

    # get factorization performance evaluation
    #    perf = snm.performance.getResults()
    #   H1_perf = mRNA.clone(base_filename+"_H1_perf")
    #   H1_perf.store_assay_data_frame(perf['H0'])
    #   H2_perf = mRNA.clone(base_filename+"_H2_perf")
    #   H2_perf.store_assay_data_frame(perf['H1'])

    return [W, H1_miRNA, H2_genes,
            # H1_perf, H2_perf
           ], {}
Ejemplo n.º 8
0
def aggregation_task(exp, block,
                     mode, c,
                     m_rna_es, mi_rna_es, interaction_matrix,
                     base_filename,
    ):
    """
        @type m_rna_es: ExpressionSet
        @type mi_rna_es: ExpressionSet
        @type interaction_matrix: BinaryInteraction

    """
    if settings.CELERY_DEBUG:
        import sys
        sys.path.append('/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg')
        import pydevd
        pydevd.settrace('localhost', port=6901, stdoutToServer=True, stderrToServer=True)

    agg_func = svd_agg
    if mode == "SVD":
        agg_func = svd_agg
    elif mode == "SUB":
        agg_func = sub_agg

    inter_units = None
    m_rna = None
    if interaction_matrix.x1_unit == 'RefSeq':
        inter_units = interaction_matrix.load_pairs().iloc[:, 0].tolist()

    if inter_units:
        m_rna = m_rna_es.get_assay_data_frame_for_platform(exp, inter_units)
    else:
        m_rna = m_rna_es.get_assay_data_frame()

    mi_rna = mi_rna_es.get_assay_data_frame()
    gene_platform = list(m_rna.columns)
    mi_rna_platform = list(mi_rna.columns)
    AllUpdated(
        exp.pk,
        comment=u"Transforming interaction matrix",
        silent=False,
        mode=NotifyMode.INFO
    ).send()

    targets_matrix = interaction_matrix.get_matrix_for_platform(exp, gene_platform, mi_rna_platform, symmetrize=False, identifiers=True)

    AllUpdated(
        exp.pk,
        comment=u"Transforming interaction matrix done",
        silent=False,
        mode=NotifyMode.INFO
    ).send()

    # targets_matrix = interaction_matrix.load_matrix()
    targets_matrix.columns = m_rna.columns
    targets_matrix.index = mi_rna.columns

    result_df = agg_func(m_rna, mi_rna, targets_matrix, c)
    result = m_rna_es.clone(base_filename)
    result.store_assay_data_frame(result_df)
    try:
        result.store_pheno_data_frame(mi_rna_es.get_pheno_data_frame())
    except RuntimeError as r:
        pass
    return [result], {}
Ejemplo n.º 9
0
def user_upload_complex_task(exp, block):
    sep_m_rna = getattr(block, "csv_sep_m_rna", " ")
    sep_mi_rna = getattr(block, "csv_sep_mi_rna", " ")
    sep_methyl = getattr(block, "csv_sep_methyl", " ")
    sep_pheno = getattr(block, "csv_sep_pheno", " ")

    AllUpdated(exp.pk,
               comment=u"Processing UserUploadComplex block",
               silent=False,
               mode=NotifyMode.INFO).send()

    if not block.pheno_matrix:
        block.warnings.append(Exception("Phenotype is undefined"))
        AllUpdated(exp.pk,
                   comment=u"Phenotype is undefined",
                   silent=False,
                   mode=NotifyMode.INFO).send()

        pheno_df = None
    else:
        pheno_df = block.pheno_matrix.get_as_data_frame(sep_pheno)
        pheno_df.set_index(pheno_df.columns[0], inplace=True)

        # TODO: solve somehow better: Here we add empty column with user class assignment
        pheno_df[ExpressionSet(None,
                               None).pheno_metadata["user_class_title"]] = ""

    m_rna_es = None
    mi_rna_es = None
    methyl_es = None
    if block.m_rna_matrix is not None:
        m_rna_assay_df = block.m_rna_matrix.get_as_data_frame(sep_m_rna)
        m_rna_es, m_rna_assay_df, gpl_file = process_data_frame(
            exp, block, m_rna_assay_df, block.m_rna_matrix_ori,
            block.m_rna_platform, block.m_rna_unit, "m_rna")
        block.m_rna_gpl_file = gpl_file

        if pheno_df is not None:
            m_rna_es.store_pheno_data_frame(pheno_df)
        m_rna_es.working_unit = block.m_rna_unit

    if block.mi_rna_matrix is not None:
        mi_rna_assay_df = block.mi_rna_matrix.get_as_data_frame(sep_mi_rna)
        mi_rna_es, mi_rna_assay_df, gpl_file = process_data_frame(
            exp, block, mi_rna_assay_df, block.mi_rna_matrix_ori,
            block.mi_rna_platform, block.mi_rna_unit, "mi_rna")
        block.mi_rna_gpl_file = gpl_file

        if pheno_df is not None:
            mi_rna_es.store_pheno_data_frame(pheno_df)
        mi_rna_es.working_unit = block.mi_rna_unit

    if block.methyl_matrix is not None:
        methyl_assay_df = block.methyl_matrix.get_as_data_frame(sep_methyl)
        methyl_es, methyl_assay_df, gpl_file = process_data_frame(
            exp, block, methyl_assay_df, block.methyl_matrix_ori,
            block.methyl_platform, "methyl")
        block.methyl_gpl_file = gpl_file

        if pheno_df is not None:
            methyl_es.store_pheno_data_frame(pheno_df)
        # methyl_es.working_unit = block.methyl_unit

    AllUpdated(exp.pk,
               comment=u"Finished processing of UserUploadComplex",
               silent=False,
               mode=NotifyMode.INFO).send()

    return [m_rna_es, mi_rna_es, methyl_es], {}
Ejemplo n.º 10
0
def pattern_search(
        exp,
        block,
        m_rna_es,
        mi_rna_es,
        gene2gene,
        miRNA2gene,
        # gene_platform,
        # miRNA_platform,
        radius,
        min_imp,
        number_of_genes,
        metric,
        base_filename):
    """
        @type m_rna_es: ExpressionSet
        @type mi_rna_es: ExpressionSet
        @type gene2gene: BinaryInteraction
        @type miRNA2gene: BinaryInteraction
        @type radius: int
        @type min_imp: double
    """

    AllUpdated(exp.pk,
               comment=u"Initializing data...",
               silent=False,
               mode=NotifyMode.INFO).send()

    exp.log(block.uuid, "Initializing data...")

    mData = m_rna_es.get_assay_data_frame()
    gene_platform = list(mData.columns)
    AllUpdated(exp.pk,
               comment=u"Transforming interaction matrix",
               silent=False,
               mode=NotifyMode.INFO).send()

    gene2gene = gene2gene.get_matrix_for_platform(exp,
                                                  gene_platform,
                                                  symmetrize=True,
                                                  identifiers=False)

    AllUpdated(exp.pk,
               comment=u"Transforming interaction matrix done",
               silent=False,
               mode=NotifyMode.INFO).send()

    # TODO fix pattern search
    # if miRNA2gene is not None:
    #     miRNA2gene = miRNA2gene.load_matrix().T
    #     miRNA2gene = sp.coo_matrix(miRNA2gene.values)
    # if mi_rna_es is not None:
    #     miData = mi_rna_es.get_assay_data_frame()
    #     mir2gene = miRNA2gene
    #     mir2gene = sp.coo_matrix(mir2gene.values).T
    #     nw = mergeNetworks(gene2gene, mir2gene)
    # else:
    # gene2gene = gene2gene.load_matrix()
    # nw = sparse_df_to_saprse_matrix(gene2gene)
    nw = gene2gene.tocsr()
    # data = mData.ix[1:]
    data = mData
    data.set_index(data.columns[0], inplace=True, drop=True)

    data = zscore(data)
    pheno = m_rna_es.get_pheno_data_frame()
    classes = pheno['User_class'].values
    if settings.CELERY_DEBUG:
        import sys
        sys.path.append(
            '/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg'
        )
        import pydevd
        pydevd.settrace('localhost',
                        port=6901,
                        stdoutToServer=True,
                        stderrToServer=True)

    exp.log(block.uuid, "Data ready. Running Pattern Search")
    seeds = np.random.choice(np.unique(nw.indices),
                             number_of_genes,
                             replace=False)
    # inicializace objektu metric=metric,
    searcher = DifferentialPatternSearcher(nw,
                                           radius=radius,
                                           min_improve=min_imp,
                                           seeds=seeds,
                                           base_dir="orig_interactions/",
                                           verbose=True)

    #vlastni search
    res = searcher.search(data, classes)
    exp.log(block.uuid, "Pattern search finished.")

    # res ... list patternu,
    # tj. pro nase potreby:
    comodule_set = map(
        lambda pattern: [gene_platform[gene] for gene in pattern.genes], res)

    # cs = ComoduleSet(exp.get_data_folder(), base_filename)
    gene_sets = GeneSets(exp.get_data_folder(),
                         "%s_ps_gene_sets" % str(block.uuid))
    result = {key: value for key, value in enumerate(comodule_set)}
    gs = GS(result, result)
    gene_sets.store_gs(gs)

    # self.set_out_var("gene_sets", gene_sets)
    # result = {key: value for key, value in enumerate(comodule_set)}
    # cs.store_set(result)
    # exp.log(block.uuid, "ComoduleSet stored.")

    return [gene_sets], {}
Ejemplo n.º 11
0
    def get_matrix_for_platform(self,
                                exp,
                                gene_list,
                                mirna_list=None,
                                symmetrize=True,
                                identifiers=True,
                                tolower=False):
        if settings.CELERY_DEBUG:
            import sys
            sys.path.append(
                '/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg'
            )
            import pydevd
            pydevd.settrace('localhost',
                            port=6901,
                            stdoutToServer=True,
                            stderrToServer=True)

        from collections import defaultdict
        from wrappers.input.utils import find_refseqs
        log.debug(gene_list)
        if mirna_list:
            log.debug(mirna_list)
        regex = "^[A-Z][A-Z]_[a-zA-Z0-9.]*"
        if len(
                filter(
                    lambda x: x is not None,
                    map(lambda x: re.match(regex, str(x), re.IGNORECASE),
                        gene_list))) < (len(gene_list) * 0.5):
            new_g = []
            for gene in gene_list:
                rf = list(find_refseqs(gene))
                if len(rf) > 0:
                    new_g.append(rf[0])
                if len(rf) == 0:
                    new_g.append(gene)
            gene_list = new_g
        hasht = dict(zip(gene_list, range(len(gene_list))))

        mirna_hasht = dict()
        if mirna_list is not None:
            new_g = []
            for gene in mirna_list:
                rf = list(find_refseqs(gene))
                if len(rf) > 0:
                    new_g.append(rf[0])
                else:
                    new_g.append(gene)
            mirna_list = new_g
            mirna_hasht = dict(zip(mirna_list, range(len(mirna_list))))

        inter_hash = defaultdict(list)
        interactons = self.load_pairs()
        cols = []
        rows = []
        log.debug("transforming interactions")
        for ix in range(len(interactons)):
            a, b, val = interactons.iloc[ix]
            if mirna_list is not None:
                if self.x2_unit == 'mirbase':
                    inter_hash[b].append([a, val])
                else:
                    inter_hash[a].append([b, val])
            else:
                inter_hash[a].append([b, val])
        if exp:
            AllUpdated(exp.pk,
                       comment=u"Transforming interaction matrix done",
                       silent=False,
                       mode=NotifyMode.INFO).send()
        log.debug("transformation of interactions done")
        count = 0
        counter2 = 0
        counter3 = 0
        counter4 = 0
        size_hash = len(inter_hash)
        if mirna_list is None:
            for key, value in inter_hash.iteritems():
                count += 1
                if count % 500 == 0:
                    log.debug("translating gene %d", count)
                    if exp:
                        AllUpdated(exp.pk,
                                   comment=u"Translating gene %s of %s" %
                                   (count, size_hash),
                                   silent=False,
                                   mode=NotifyMode.INFO).send()
                refseqs = find_refseqs(key)
                for refseq in refseqs:
                    counter2 += 1
                    if refseq not in hasht:
                        continue
                    if refseq in hasht:
                        for (gene, strength) in value:
                            # new_inters.append([(refseq, new_refseq, strength)
                            for new_refseq in find_refseqs(gene):
                                counter3 += 1
                                gi = refseq
                                gj = new_refseq
                                if gj not in hasht:
                                    continue
                                counter4 += 1
                                val = strength
                                if tolower:
                                    gi = gi.lower()
                                    gj = gj.lower()
                                cols.append(hasht[gi])
                                rows.append(hasht[gj])
        else:
            for key, value in inter_hash.iteritems():
                count += 1
                if count % 500 == 0:
                    log.debug("translating miRNA %d", count)
                    if exp:
                        AllUpdated(exp.pk,
                                   comment=u"Translating miRNA %s of %s" %
                                   (count, size_hash),
                                   silent=False,
                                   mode=NotifyMode.INFO).send()
                refseqs = find_refseqs(key)
                for refseq in refseqs:
                    counter2 += 1
                    if refseq not in mirna_hasht:
                        continue
                    if refseq in mirna_hasht:
                        for (gene, strength) in value:
                            for new_refseq in find_refseqs(gene):
                                counter3 += 1
                                gi = refseq
                                gj = new_refseq
                                if gj not in hasht:
                                    continue
                                counter4 += 1
                                val = strength
                                if tolower:
                                    gi = gi.lower()
                                    gj = gj.lower()
                                rows.append(mirna_hasht[gi])
                                cols.append(hasht[gj])
        # size = max(max(rows), max(cols)) + 1
        if exp:
            AllUpdated(exp.pk,
                       comment=u"%d interactions were found." % len(cols),
                       silent=False,
                       mode=NotifyMode.INFO).send()
        inters_matr = None
        # TODO fix for custom value of interactions
        if mirna_list is None:
            # inters_matr = sp.coo_matrix((np.ones(len(cols)), (rows, cols)), (size, size))
            inters_matr = sp.coo_matrix((np.ones(len(cols)), (rows, cols)),
                                        (len(gene_list), len(gene_list)))
        else:
            inters_matr = sp.coo_matrix((np.ones(len(cols)), (rows, cols)),
                                        (len(mirna_list), len(gene_list)))
            #inters_matr = sp.coo_matrix((np.ones(len(cols)), (rows, cols)), (max(rows) + 1, max(cols) + 1))

        if symmetrize:
            inters_matr = inters_matr + inters_matr.T
            inters_matr.data /= inters_matr.data

        if identifiers:
            inters_matr = inters_matr.tocsr()
            sparse_df = pd.SparseDataFrame([
                pd.SparseSeries(inters_matr[i].toarray().ravel())
                for i in np.arange(inters_matr.shape[0])
            ])
            # sparse_df = sparse_df.to_dense()
            if mirna_list is None:
                index = gene_list[:sparse_df.shape[0]]
                columns = gene_list[:sparse_df.shape[1]]
            else:
                index = mirna_list[:sparse_df.shape[0]]
                columns = gene_list[:sparse_df.shape[1]]
            if settings.CELERY_DEBUG:
                import sys
                sys.path.append(
                    '/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg'
                )
                import pydevd
                pydevd.settrace('localhost',
                                port=6901,
                                stdoutToServer=True,
                                stderrToServer=True)

            # sparse_df['new_index'] = pd.Series(index, index=sparse_df.index)
            sparse_df.set_index([index], inplace=True)
            sparse_df.columns = columns
            return sparse_df
        return inters_matr
Ejemplo n.º 12
0
def filter_by_bi(exp, block, m_rna_es, mi_rna_es, interaction_matrix,
                 base_filename):
    if settings.CELERY_DEBUG:
        import sys
        sys.path.append(
            '/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg'
        )
        import pydevd
        pydevd.settrace('localhost',
                        port=6901,
                        stdoutToServer=True,
                        stderrToServer=True)

    # m_rna_df = m_rna_es.get_assay_data_frame()
    # mi_rna_df = mi_rna_es.get_assay_data_frame()
    inter_units = None

    if interaction_matrix.x1_unit == 'RefSeq':
        inter_units = interaction_matrix.load_pairs().iloc[:, 0].tolist()

    if inter_units:
        m_rna_df = m_rna_es.get_assay_data_frame_for_platform(exp, inter_units)
    else:
        m_rna_df = m_rna_es.get_assay_data_frame()

    mi_rna_df = mi_rna_es.get_assay_data_frame()
    gene_platform = list(m_rna_df.columns)
    mi_rna_platform = list(mi_rna_df)
    AllUpdated(exp.pk,
               comment=u"Transforming interaction matrix",
               silent=False,
               mode=NotifyMode.INFO).send()

    targets_matrix = interaction_matrix.get_matrix_for_platform(
        exp,
        gene_platform,
        mi_rna_platform,
        symmetrize=False,
        identifiers=True)

    AllUpdated(exp.pk,
               comment=u"Transforming interaction matrix done",
               silent=False,
               mode=NotifyMode.INFO).send()

    # targets_matrix = interaction_matrix.load_matrix()
    targets_matrix.columns = m_rna_df.columns
    targets_matrix.index = mi_rna_df.columns

    # allowed_m_rna_index_set = set(targets_matrix.columns) & set(m_rna_df.index)
    allowed_m_rna_index_set = set(targets_matrix.columns) & set(
        m_rna_df.columns)

    m_rna_df_filtered = m_rna_df.loc[:, allowed_m_rna_index_set]

    # allowed_mi_rna_index_set = set(targets_matrix.index) & set(mi_rna_df.index)
    allowed_mi_rna_index_set = set(targets_matrix.index) & set(
        mi_rna_df.columns)

    mi_rna_df_filtered = mi_rna_df.loc[:, allowed_mi_rna_index_set]

    #result_df = agg_func(m_rna, mi_rna, targets_matrix, c)
    m_rna_result = m_rna_es.clone(base_filename + "_mRNA")
    m_rna_result.store_assay_data_frame(m_rna_df_filtered)
    try:
        m_rna_result.store_pheno_data_frame(m_rna_es.get_pheno_data_frame())
    except RuntimeError as re:
        exp.log(block.uuid, "Phenotype not set")
        log.debug("Phenotype not set")
    mi_rna_result = mi_rna_es.clone(base_filename + "_miRNA")
    mi_rna_result.store_assay_data_frame(mi_rna_df_filtered)
    try:
        mi_rna_result.store_pheno_data_frame(mi_rna_es.get_pheno_data_frame())
    except RuntimeError as re:
        exp.log(block.uuid, "Phenotype not set")
        log.debug("Phenotype not set")
    return [m_rna_result, mi_rna_result], {}