def bunch_upload_task(exp, block): seq = [] for pheno_name, (es_mRNA_name, es_miRNA_name) in block.pheno_by_es_names.iteritems(): es_mRNA_ufw = block.es_mRNA_matrices[es_mRNA_name] es_mRNA_df = es_mRNA_ufw.get_as_data_frame(block.csv_sep_m_rna) es_miRNA_ufw = None if es_miRNA_name is not None: es_miRNA_ufw = block.es_miRNA_matrices[es_miRNA_name] if es_miRNA_ufw is not None: es_miRNA_df = es_miRNA_ufw.get_as_data_frame(block.csv_sep_mi_rna) pheno_ufw = block.pheno_matrices[pheno_name] pheno_df = pheno_ufw.get_as_data_frame(block.csv_sep) es_mRNA, es_mRNA_df, gpl_file = process_data_frame(exp, block, es_mRNA_df, block.es_mRNA_matrices_ori, block.m_rna_platform, block.m_rna_unit, "m_rna") if es_miRNA_name is not None: es_miRNA, es_miRNA_df, gpl_file = process_data_frame(exp, block, es_miRNA_df, block.es_miRNA_matrices_ori, block.mi_rna_platform, block.mi_rna_unit, "mi_rna") pheno_df.set_index(pheno_df.columns[0], inplace=True) es_mRNA_sample_names = sorted(es_mRNA_df.index.tolist()) if es_miRNA_name is not None: es_miRNA_sample_names = sorted(es_miRNA_df.index.tolist()) pheno_sample_names = sorted(pheno_df.index.tolist()) if es_mRNA_sample_names != pheno_sample_names: msg = "Couldn't match `%s` and `%s` due to different sample name sets" % (es_mRNA_name, pheno_name) AllUpdated( exp.pk, comment=msg, silent=False, mode=NotifyMode.ERROR ).send() raise RuntimeError(msg) if es_miRNA_name is not None: if es_miRNA_sample_names != pheno_sample_names: msg = "Couldn't match `%s` and `%s` due to different sample name sets" % (es_miRNA_name, pheno_name) AllUpdated( exp.pk, comment=msg, silent=False, mode=NotifyMode.ERROR ).send() raise RuntimeError(msg) es_mRNA.store_pheno_data_frame(pheno_df) if es_miRNA_name is not None: es_miRNA.store_pheno_data_frame(pheno_df) es_mRNA.pheno_metadata["user_class_title"] = pheno_df.columns[0] if es_miRNA_name is not None: es_miRNA.pheno_metadata["user_class_title"] = pheno_df.columns[0] else: es_miRNA = None seq.append({"mRNA_es": es_mRNA, "miRNA_es": es_miRNA, "__label__": es_mRNA_name}) block.seq = seq return [block], {}
def upload_interaction_task(exp, block): AllUpdated(exp.pk, comment=u"Processing Upload Interaction block", silent=False, mode=NotifyMode.INFO).send() if settings.CELERY_DEBUG: import sys sys.path.append( '/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg' ) import pydevd pydevd.settrace('localhost', port=6901, stdoutToServer=True, stderrToServer=True) # Convert to BinaryInteraction sep = getattr(block, "csv_sep", " ") if block.header: _header = 0 else: _header = None interaction_df = block.upload_interaction.get_as_data_frame(sep=sep, header=_header) sd = None # if self.bi_data_type in ["pairs", "triples", "pairs_diff", "triples_diff"]: # we have to find a shape of interaction matrix features_1 = interaction_df[interaction_df.columns[0]].tolist() features_2 = interaction_df[interaction_df.columns[1]].tolist() interactions = [] if block.bi_data_type in ["triples", "triples_diff"]: interactions = zip(features_1, features_2, interaction_df[interaction_df.columns[2]].tolist()) else: interactions = zip(features_1, features_2, [1] * len(features_1)) # new_inters = [expand_inters(inters_a, inters_b, value) for (inters_a, inters_b, value) in interactions] # new_inters = [item for sublist in new_inters for item in sublist] # flatten features_1 = [a for [a, _, _] in interactions] features_2 = [b for [_, b, _] in interactions] values = [c for [_, _, c] in interactions] interaction_df = pd.DataFrame() interaction_df[0] = features_1 interaction_df[1] = features_2 interaction_df[2] = values interaction = BinaryInteraction(exp.get_data_folder(), str(block.uuid)) interaction.store_pairs(interaction_df, block.bi_data_type) interaction.x1_unit = getattr(block, "x1_unit", "") interaction.x2_unit = getattr(block, "x2_unit", "") interaction.header = block.header AllUpdated(exp.pk, comment=u"Processing of Upload Interaction block is done", silent=False, mode=NotifyMode.INFO).send() return [interaction], {}
def execute(self, is_init_action=False): self.build_dag( self.exp.build_block_dependencies_by_scope(self.scope_name)) blocks_to_execute = [] working_blocks = [] blocks_dict = dict(self.exp.get_blocks(self.exp.get_all_block_uuids())) for block_uuid in self.dag.topological_order: block = blocks_dict[block_uuid] if is_init_action and block.is_block_supports_auto_execution and block.get_exec_status( ) == "done": block.do_action("reset_execution", self.exp) if block.get_exec_status() == "ready" and \ self.is_block_inputs_are_satisfied(block_uuid, blocks_dict) and block.is_block_supports_auto_execution: blocks_to_execute.append(block) if block.get_exec_status() == "working" and \ self.is_block_inputs_are_satisfied(block_uuid, blocks_dict): working_blocks.append(block) if not blocks_to_execute and not working_blocks: log.debug("Nothing to execute in scope `%s` for exp `%s`", self.scope_name, self.exp.pk) if self.scope_name != "root": block = self.exp.get_meta_block_by_sub_scope(self.scope_name) block.do_action("on_sub_scope_done", self.exp) else: AllUpdated(self.exp.pk, comment=u"Workflow execution completed", mode=NotifyMode.SUCCESS, silent=False).send() self.exp.log("root", "Experiment finished.") self.exp.done() elif blocks_to_execute: # for block in blocks_to_execute: # block.do_action("execute", self.exp AllUpdated(self.exp.pk, comment=u"Executing Block %s" % blocks_to_execute[0].name.decode("utf-8"), mode=NotifyMode.SUCCESS, silent=False).send() blocks_to_execute[0].do_action("execute", self.exp)
def process_data_frame(exp, block, df, ori, platform, unit, data_type="m_rna"): if settings.CELERY_DEBUG: import sys sys.path.append( '/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg' ) import pydevd pydevd.settrace('localhost', port=6901, stdoutToServer=True, stderrToServer=True) df.set_index(df.columns[0], inplace=True) # if matrix is bad oriented, then do transposition if ori == "GxS": df = df.T # df.columns = df.iloc[0] # df = df.drop(df.index[0]) # if isinstance(df.columns[0][0], basestring): gpl_file = None if platform: AllUpdated(exp.pk, comment=u"Fetching platform %s" % platform, silent=False, mode=NotifyMode.INFO).send() gpl_file = fetch_geo_gpl(exp, block, platform) df, matched = convert_ids(gpl_file, df, data_type) AllUpdated(exp.pk, comment=u"Matched %s features for %s dataset" % (matched, data_type), silent=False, mode=NotifyMode.INFO).send() unit = 'RefSeq' es = ExpressionSet(base_dir=exp.get_data_folder(), base_filename="%s_%s_es" % (block.uuid, data_type)) es.working_unit = unit es.store_assay_data_frame(df) return es, df, gpl_file
def get_assay_data_frame_for_platform(self, exp, platform): """ @rtype: pd.DataFrame """ if self.assay_data_storage is None: raise RuntimeError("Assay data wasn't setup prior") p = set(platform) checksum = np.frombuffer("".join(platform), "uint8").sum() if checksum in self.df_platform: if self.df_platform[checksum]: if exp: AllUpdated(exp.pk, comment=u"Loading Expression Set from Cache", silent=False, mode=NotifyMode.INFO).send() return self.df_platform[checksum].load() if self.working_unit != 'RefSeq': if exp: AllUpdated(exp.pk, comment=u"Converting unit %s to RefSeq" % self.working_unit, silent=False, mode=NotifyMode.INFO).send() df = self.assay_data_storage.load() df, matched = convert_to_refseq(df, p) self.df_platform[checksum] = DataFrameStorage( filepath="%s/%s_%s_assay.csv.gz" % (self.base_dir, self.base_filename, checksum)) self.df_platform[checksum].store(df) if exp: AllUpdated(exp.pk, comment=u"Converted %s %s to RefSeq" % (matched, self.working_unit), silent=False, mode=NotifyMode.INFO).send() return df return self.assay_data_storage.load()
def halt_execution_task(exp, scope_name): log.debug("halt execution invoked") r = get_redis_instance() lock_key = ExpKeys.get_auto_exec_task_lock_key(exp.pk, scope_name) with redis_lock.Lock(r, lock_key): try: if scope_name == "root": AllUpdated( exp.pk, comment=u"An error occurred during experiment execution", silent=False, mode=NotifyMode.ERROR).send() else: block = exp.get_meta_block_by_sub_scope(scope_name) block.do_action("error", exp) except Exception, e: log.exception(e)
def nimfa_snmnmf_task(exp, block, mRNA, miRNA, # DNAmethyl, gene2gene, miRNA2gene, # gene2DNAmethylation, params, base_filename ): if settings.CELERY_DEBUG: import sys sys.path.append('/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg') import pydevd pydevd.settrace('localhost', port=6901, stdoutToServer=True, stderrToServer=True) ### factorization ### # initialize the factorization # mi_rna = mi_rna[mi_rna.columns[0:30]] # m_rna = m_rna[m_rna.columns[0:50]] # # TODO FULL mRNA_matrix = mRNA.get_assay_data_frame() mRNA_matrix = mRNA_matrix[mRNA_matrix.columns[0:2500]] miRNA_matrix = miRNA.get_assay_data_frame() miRNA_matrix = miRNA_matrix[miRNA_matrix.columns[0:2500]] gene_platform = list(mRNA_matrix.columns) mi_rna_platform = list(miRNA_matrix.columns) AllUpdated( exp.pk, comment=u"Transforming interaction matrix", silent=False, mode=NotifyMode.INFO ).send() g2g = gene2gene.get_matrix_for_platform(exp, gene_platform, identifiers=False) m2g = miRNA2gene.get_matrix_for_platform(exp, gene_platform, mi_rna_platform, identifiers=False, symmetrize=False) AllUpdated( exp.pk, comment=u"Transforming interaction matrix done", silent=False, mode=NotifyMode.INFO ).send() snm = ns.NIMFA_SNMNMF(mRNA=mRNA_matrix, miRNA=miRNA_matrix, DNAmethyl=None, gene2gene=g2g, miRNA2gene=m2g, gene2DNAmethylation=None, params=params) # run factorization try: snm.run(seed="random_c", rank=params['rank'], max_iter=5) except MFError as mfe: raise(Exception(mfe.message)) W = mRNA.clone(base_filename + "_W") W.store_assay_data_frame(snm.W) H1_miRNA = mRNA.clone(base_filename + "_H1_miRNA") H1_miRNA.store_assay_data_frame(snm.H1_miRNA) H2_genes = mRNA.clone(base_filename + "_H2_genes") H2_genes.store_assay_data_frame(snm.H2_genes) # get factorization performance evaluation # perf = snm.performance.getResults() # H1_perf = mRNA.clone(base_filename+"_H1_perf") # H1_perf.store_assay_data_frame(perf['H0']) # H2_perf = mRNA.clone(base_filename+"_H2_perf") # H2_perf.store_assay_data_frame(perf['H1']) return [W, H1_miRNA, H2_genes, # H1_perf, H2_perf ], {}
def aggregation_task(exp, block, mode, c, m_rna_es, mi_rna_es, interaction_matrix, base_filename, ): """ @type m_rna_es: ExpressionSet @type mi_rna_es: ExpressionSet @type interaction_matrix: BinaryInteraction """ if settings.CELERY_DEBUG: import sys sys.path.append('/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg') import pydevd pydevd.settrace('localhost', port=6901, stdoutToServer=True, stderrToServer=True) agg_func = svd_agg if mode == "SVD": agg_func = svd_agg elif mode == "SUB": agg_func = sub_agg inter_units = None m_rna = None if interaction_matrix.x1_unit == 'RefSeq': inter_units = interaction_matrix.load_pairs().iloc[:, 0].tolist() if inter_units: m_rna = m_rna_es.get_assay_data_frame_for_platform(exp, inter_units) else: m_rna = m_rna_es.get_assay_data_frame() mi_rna = mi_rna_es.get_assay_data_frame() gene_platform = list(m_rna.columns) mi_rna_platform = list(mi_rna.columns) AllUpdated( exp.pk, comment=u"Transforming interaction matrix", silent=False, mode=NotifyMode.INFO ).send() targets_matrix = interaction_matrix.get_matrix_for_platform(exp, gene_platform, mi_rna_platform, symmetrize=False, identifiers=True) AllUpdated( exp.pk, comment=u"Transforming interaction matrix done", silent=False, mode=NotifyMode.INFO ).send() # targets_matrix = interaction_matrix.load_matrix() targets_matrix.columns = m_rna.columns targets_matrix.index = mi_rna.columns result_df = agg_func(m_rna, mi_rna, targets_matrix, c) result = m_rna_es.clone(base_filename) result.store_assay_data_frame(result_df) try: result.store_pheno_data_frame(mi_rna_es.get_pheno_data_frame()) except RuntimeError as r: pass return [result], {}
def user_upload_complex_task(exp, block): sep_m_rna = getattr(block, "csv_sep_m_rna", " ") sep_mi_rna = getattr(block, "csv_sep_mi_rna", " ") sep_methyl = getattr(block, "csv_sep_methyl", " ") sep_pheno = getattr(block, "csv_sep_pheno", " ") AllUpdated(exp.pk, comment=u"Processing UserUploadComplex block", silent=False, mode=NotifyMode.INFO).send() if not block.pheno_matrix: block.warnings.append(Exception("Phenotype is undefined")) AllUpdated(exp.pk, comment=u"Phenotype is undefined", silent=False, mode=NotifyMode.INFO).send() pheno_df = None else: pheno_df = block.pheno_matrix.get_as_data_frame(sep_pheno) pheno_df.set_index(pheno_df.columns[0], inplace=True) # TODO: solve somehow better: Here we add empty column with user class assignment pheno_df[ExpressionSet(None, None).pheno_metadata["user_class_title"]] = "" m_rna_es = None mi_rna_es = None methyl_es = None if block.m_rna_matrix is not None: m_rna_assay_df = block.m_rna_matrix.get_as_data_frame(sep_m_rna) m_rna_es, m_rna_assay_df, gpl_file = process_data_frame( exp, block, m_rna_assay_df, block.m_rna_matrix_ori, block.m_rna_platform, block.m_rna_unit, "m_rna") block.m_rna_gpl_file = gpl_file if pheno_df is not None: m_rna_es.store_pheno_data_frame(pheno_df) m_rna_es.working_unit = block.m_rna_unit if block.mi_rna_matrix is not None: mi_rna_assay_df = block.mi_rna_matrix.get_as_data_frame(sep_mi_rna) mi_rna_es, mi_rna_assay_df, gpl_file = process_data_frame( exp, block, mi_rna_assay_df, block.mi_rna_matrix_ori, block.mi_rna_platform, block.mi_rna_unit, "mi_rna") block.mi_rna_gpl_file = gpl_file if pheno_df is not None: mi_rna_es.store_pheno_data_frame(pheno_df) mi_rna_es.working_unit = block.mi_rna_unit if block.methyl_matrix is not None: methyl_assay_df = block.methyl_matrix.get_as_data_frame(sep_methyl) methyl_es, methyl_assay_df, gpl_file = process_data_frame( exp, block, methyl_assay_df, block.methyl_matrix_ori, block.methyl_platform, "methyl") block.methyl_gpl_file = gpl_file if pheno_df is not None: methyl_es.store_pheno_data_frame(pheno_df) # methyl_es.working_unit = block.methyl_unit AllUpdated(exp.pk, comment=u"Finished processing of UserUploadComplex", silent=False, mode=NotifyMode.INFO).send() return [m_rna_es, mi_rna_es, methyl_es], {}
def pattern_search( exp, block, m_rna_es, mi_rna_es, gene2gene, miRNA2gene, # gene_platform, # miRNA_platform, radius, min_imp, number_of_genes, metric, base_filename): """ @type m_rna_es: ExpressionSet @type mi_rna_es: ExpressionSet @type gene2gene: BinaryInteraction @type miRNA2gene: BinaryInteraction @type radius: int @type min_imp: double """ AllUpdated(exp.pk, comment=u"Initializing data...", silent=False, mode=NotifyMode.INFO).send() exp.log(block.uuid, "Initializing data...") mData = m_rna_es.get_assay_data_frame() gene_platform = list(mData.columns) AllUpdated(exp.pk, comment=u"Transforming interaction matrix", silent=False, mode=NotifyMode.INFO).send() gene2gene = gene2gene.get_matrix_for_platform(exp, gene_platform, symmetrize=True, identifiers=False) AllUpdated(exp.pk, comment=u"Transforming interaction matrix done", silent=False, mode=NotifyMode.INFO).send() # TODO fix pattern search # if miRNA2gene is not None: # miRNA2gene = miRNA2gene.load_matrix().T # miRNA2gene = sp.coo_matrix(miRNA2gene.values) # if mi_rna_es is not None: # miData = mi_rna_es.get_assay_data_frame() # mir2gene = miRNA2gene # mir2gene = sp.coo_matrix(mir2gene.values).T # nw = mergeNetworks(gene2gene, mir2gene) # else: # gene2gene = gene2gene.load_matrix() # nw = sparse_df_to_saprse_matrix(gene2gene) nw = gene2gene.tocsr() # data = mData.ix[1:] data = mData data.set_index(data.columns[0], inplace=True, drop=True) data = zscore(data) pheno = m_rna_es.get_pheno_data_frame() classes = pheno['User_class'].values if settings.CELERY_DEBUG: import sys sys.path.append( '/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg' ) import pydevd pydevd.settrace('localhost', port=6901, stdoutToServer=True, stderrToServer=True) exp.log(block.uuid, "Data ready. Running Pattern Search") seeds = np.random.choice(np.unique(nw.indices), number_of_genes, replace=False) # inicializace objektu metric=metric, searcher = DifferentialPatternSearcher(nw, radius=radius, min_improve=min_imp, seeds=seeds, base_dir="orig_interactions/", verbose=True) #vlastni search res = searcher.search(data, classes) exp.log(block.uuid, "Pattern search finished.") # res ... list patternu, # tj. pro nase potreby: comodule_set = map( lambda pattern: [gene_platform[gene] for gene in pattern.genes], res) # cs = ComoduleSet(exp.get_data_folder(), base_filename) gene_sets = GeneSets(exp.get_data_folder(), "%s_ps_gene_sets" % str(block.uuid)) result = {key: value for key, value in enumerate(comodule_set)} gs = GS(result, result) gene_sets.store_gs(gs) # self.set_out_var("gene_sets", gene_sets) # result = {key: value for key, value in enumerate(comodule_set)} # cs.store_set(result) # exp.log(block.uuid, "ComoduleSet stored.") return [gene_sets], {}
def get_matrix_for_platform(self, exp, gene_list, mirna_list=None, symmetrize=True, identifiers=True, tolower=False): if settings.CELERY_DEBUG: import sys sys.path.append( '/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg' ) import pydevd pydevd.settrace('localhost', port=6901, stdoutToServer=True, stderrToServer=True) from collections import defaultdict from wrappers.input.utils import find_refseqs log.debug(gene_list) if mirna_list: log.debug(mirna_list) regex = "^[A-Z][A-Z]_[a-zA-Z0-9.]*" if len( filter( lambda x: x is not None, map(lambda x: re.match(regex, str(x), re.IGNORECASE), gene_list))) < (len(gene_list) * 0.5): new_g = [] for gene in gene_list: rf = list(find_refseqs(gene)) if len(rf) > 0: new_g.append(rf[0]) if len(rf) == 0: new_g.append(gene) gene_list = new_g hasht = dict(zip(gene_list, range(len(gene_list)))) mirna_hasht = dict() if mirna_list is not None: new_g = [] for gene in mirna_list: rf = list(find_refseqs(gene)) if len(rf) > 0: new_g.append(rf[0]) else: new_g.append(gene) mirna_list = new_g mirna_hasht = dict(zip(mirna_list, range(len(mirna_list)))) inter_hash = defaultdict(list) interactons = self.load_pairs() cols = [] rows = [] log.debug("transforming interactions") for ix in range(len(interactons)): a, b, val = interactons.iloc[ix] if mirna_list is not None: if self.x2_unit == 'mirbase': inter_hash[b].append([a, val]) else: inter_hash[a].append([b, val]) else: inter_hash[a].append([b, val]) if exp: AllUpdated(exp.pk, comment=u"Transforming interaction matrix done", silent=False, mode=NotifyMode.INFO).send() log.debug("transformation of interactions done") count = 0 counter2 = 0 counter3 = 0 counter4 = 0 size_hash = len(inter_hash) if mirna_list is None: for key, value in inter_hash.iteritems(): count += 1 if count % 500 == 0: log.debug("translating gene %d", count) if exp: AllUpdated(exp.pk, comment=u"Translating gene %s of %s" % (count, size_hash), silent=False, mode=NotifyMode.INFO).send() refseqs = find_refseqs(key) for refseq in refseqs: counter2 += 1 if refseq not in hasht: continue if refseq in hasht: for (gene, strength) in value: # new_inters.append([(refseq, new_refseq, strength) for new_refseq in find_refseqs(gene): counter3 += 1 gi = refseq gj = new_refseq if gj not in hasht: continue counter4 += 1 val = strength if tolower: gi = gi.lower() gj = gj.lower() cols.append(hasht[gi]) rows.append(hasht[gj]) else: for key, value in inter_hash.iteritems(): count += 1 if count % 500 == 0: log.debug("translating miRNA %d", count) if exp: AllUpdated(exp.pk, comment=u"Translating miRNA %s of %s" % (count, size_hash), silent=False, mode=NotifyMode.INFO).send() refseqs = find_refseqs(key) for refseq in refseqs: counter2 += 1 if refseq not in mirna_hasht: continue if refseq in mirna_hasht: for (gene, strength) in value: for new_refseq in find_refseqs(gene): counter3 += 1 gi = refseq gj = new_refseq if gj not in hasht: continue counter4 += 1 val = strength if tolower: gi = gi.lower() gj = gj.lower() rows.append(mirna_hasht[gi]) cols.append(hasht[gj]) # size = max(max(rows), max(cols)) + 1 if exp: AllUpdated(exp.pk, comment=u"%d interactions were found." % len(cols), silent=False, mode=NotifyMode.INFO).send() inters_matr = None # TODO fix for custom value of interactions if mirna_list is None: # inters_matr = sp.coo_matrix((np.ones(len(cols)), (rows, cols)), (size, size)) inters_matr = sp.coo_matrix((np.ones(len(cols)), (rows, cols)), (len(gene_list), len(gene_list))) else: inters_matr = sp.coo_matrix((np.ones(len(cols)), (rows, cols)), (len(mirna_list), len(gene_list))) #inters_matr = sp.coo_matrix((np.ones(len(cols)), (rows, cols)), (max(rows) + 1, max(cols) + 1)) if symmetrize: inters_matr = inters_matr + inters_matr.T inters_matr.data /= inters_matr.data if identifiers: inters_matr = inters_matr.tocsr() sparse_df = pd.SparseDataFrame([ pd.SparseSeries(inters_matr[i].toarray().ravel()) for i in np.arange(inters_matr.shape[0]) ]) # sparse_df = sparse_df.to_dense() if mirna_list is None: index = gene_list[:sparse_df.shape[0]] columns = gene_list[:sparse_df.shape[1]] else: index = mirna_list[:sparse_df.shape[0]] columns = gene_list[:sparse_df.shape[1]] if settings.CELERY_DEBUG: import sys sys.path.append( '/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg' ) import pydevd pydevd.settrace('localhost', port=6901, stdoutToServer=True, stderrToServer=True) # sparse_df['new_index'] = pd.Series(index, index=sparse_df.index) sparse_df.set_index([index], inplace=True) sparse_df.columns = columns return sparse_df return inters_matr
def filter_by_bi(exp, block, m_rna_es, mi_rna_es, interaction_matrix, base_filename): if settings.CELERY_DEBUG: import sys sys.path.append( '/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg' ) import pydevd pydevd.settrace('localhost', port=6901, stdoutToServer=True, stderrToServer=True) # m_rna_df = m_rna_es.get_assay_data_frame() # mi_rna_df = mi_rna_es.get_assay_data_frame() inter_units = None if interaction_matrix.x1_unit == 'RefSeq': inter_units = interaction_matrix.load_pairs().iloc[:, 0].tolist() if inter_units: m_rna_df = m_rna_es.get_assay_data_frame_for_platform(exp, inter_units) else: m_rna_df = m_rna_es.get_assay_data_frame() mi_rna_df = mi_rna_es.get_assay_data_frame() gene_platform = list(m_rna_df.columns) mi_rna_platform = list(mi_rna_df) AllUpdated(exp.pk, comment=u"Transforming interaction matrix", silent=False, mode=NotifyMode.INFO).send() targets_matrix = interaction_matrix.get_matrix_for_platform( exp, gene_platform, mi_rna_platform, symmetrize=False, identifiers=True) AllUpdated(exp.pk, comment=u"Transforming interaction matrix done", silent=False, mode=NotifyMode.INFO).send() # targets_matrix = interaction_matrix.load_matrix() targets_matrix.columns = m_rna_df.columns targets_matrix.index = mi_rna_df.columns # allowed_m_rna_index_set = set(targets_matrix.columns) & set(m_rna_df.index) allowed_m_rna_index_set = set(targets_matrix.columns) & set( m_rna_df.columns) m_rna_df_filtered = m_rna_df.loc[:, allowed_m_rna_index_set] # allowed_mi_rna_index_set = set(targets_matrix.index) & set(mi_rna_df.index) allowed_mi_rna_index_set = set(targets_matrix.index) & set( mi_rna_df.columns) mi_rna_df_filtered = mi_rna_df.loc[:, allowed_mi_rna_index_set] #result_df = agg_func(m_rna, mi_rna, targets_matrix, c) m_rna_result = m_rna_es.clone(base_filename + "_mRNA") m_rna_result.store_assay_data_frame(m_rna_df_filtered) try: m_rna_result.store_pheno_data_frame(m_rna_es.get_pheno_data_frame()) except RuntimeError as re: exp.log(block.uuid, "Phenotype not set") log.debug("Phenotype not set") mi_rna_result = mi_rna_es.clone(base_filename + "_miRNA") mi_rna_result.store_assay_data_frame(mi_rna_df_filtered) try: mi_rna_result.store_pheno_data_frame(mi_rna_es.get_pheno_data_frame()) except RuntimeError as re: exp.log(block.uuid, "Phenotype not set") log.debug("Phenotype not set") return [m_rna_result, mi_rna_result], {}