def process_upload(self, exp, *args, **kwargs): """ @param exp: Experiment """ self.clean_errors() assay_df = pd.DataFrame.from_csv(self.es_matrix.get_file()) es = ExpressionSet(base_dir=exp.get_data_folder(), base_filename="%s_annotation" % self.uuid) pheno_df = pd.DataFrame.from_csv(self.pheno_matrix.get_file()) pheno_df.set_index(pheno_df.columns[0]) user_class_title = es.pheno_metadata["user_class_title"] if user_class_title not in pheno_df.columns: pheno_df[es.pheno_metadata["user_class_title"]] = "" # if matrix is bad oriented, then do transposition if self.es_matrix_ori == "GxS": assay_df = assay_df.T es.store_assay_data_frame(assay_df) es.store_pheno_data_frame(pheno_df) if self.working_unit: es.working_unit = self.working_unit self.set_out_var("expression_set", es) exp.store_block(self) self.do_action("success", exp)
def process_upload(self, exp, *args, **kwargs): """ @param exp: Experiment """ self.clean_errors() assay_df = pd.DataFrame.from_csv(self.es_matrix.get_file()) es = ExpressionSet(base_dir=exp.get_data_folder(), base_filename="%s_annotation" % self.uuid) pheno_df = pd.DataFrame.from_csv(self.pheno_matrix.get_file()) pheno_df.set_index(pheno_df.columns[0]) user_class_title = es.pheno_metadata["user_class_title"] if user_class_title not in pheno_df.columns: pheno_df[es.pheno_metadata["user_class_title"]] = "" es.store_assay_data_frame(assay_df) es.store_pheno_data_frame(pheno_df) if self.working_unit: es.working_unit = self.working_unit self.set_out_var("expression_set", es) exp.store_block(self) self.do_action("success", exp)
def process_upload(self, exp, *args, **kwargs): """ @param exp: Experiment """ # TODO: move to celery self.clean_errors() sep = getattr(self, "csv_sep", " ") try: if not self.pheno_matrix: self.warnings.append(Exception("Phenotype is undefined")) pheno_df = None else: pheno_df = self.pheno_matrix.get_as_data_frame(sep) pheno_df.set_index(pheno_df.columns[0]) # TODO: solve somehow better: Here we add empty column with user class assignment pheno_df[ExpressionSet(None, None).pheno_metadata["user_class_title"]] = "" if self.m_rna_matrix is not None: m_rna_assay_df = self.m_rna_matrix.get_as_data_frame(sep) m_rna_es = ExpressionSet(base_dir=exp.get_data_folder(), base_filename="%s_m_rna_es" % self.uuid) m_rna_es.store_assay_data_frame(m_rna_assay_df) m_rna_es.store_pheno_data_frame(pheno_df) m_rna_es.working_unit = self.m_rna_unit self.set_out_var("m_rna_es", m_rna_es) # TODO: fetch GPL annotation if GPL id was provided if self.mi_rna_matrix is not None: mi_rna_assay_df = self.mi_rna_matrix.get_as_data_frame(sep) mi_rna_es = ExpressionSet(base_dir=exp.get_data_folder(), base_filename="%s_mi_rna_es" % self.uuid) mi_rna_es.store_assay_data_frame(mi_rna_assay_df) mi_rna_es.store_pheno_data_frame(pheno_df) self.set_out_var("mi_rna_es", mi_rna_es) if self.methyl_matrix is not None: methyl_assay_df = self.methyl_matrix.get_as_data_frame(sep) methyl_es = ExpressionSet(base_dir=exp.get_data_folder(), base_filename="%s_methyl_es" % self.uuid) methyl_es.store_assay_data_frame(methyl_assay_df) methyl_es.store_pheno_data_frame(pheno_df) self.set_out_var("methyl_es", methyl_es) self.do_action("success", exp) except Exception as e: ex_type, ex, tb = sys.exc_info() traceback.print_tb(tb) self.do_action("error", exp, e)
def process_data_frame(exp, block, df, ori, platform, unit, data_type="m_rna"): if settings.CELERY_DEBUG: import sys sys.path.append( '/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg' ) import pydevd pydevd.settrace('localhost', port=6901, stdoutToServer=True, stderrToServer=True) df.set_index(df.columns[0], inplace=True) # if matrix is bad oriented, then do transposition if ori == "GxS": df = df.T # df.columns = df.iloc[0] # df = df.drop(df.index[0]) # if isinstance(df.columns[0][0], basestring): gpl_file = None if platform: AllUpdated(exp.pk, comment=u"Fetching platform %s" % platform, silent=False, mode=NotifyMode.INFO).send() gpl_file = fetch_geo_gpl(exp, block, platform) df, matched = convert_ids(gpl_file, df, data_type) AllUpdated(exp.pk, comment=u"Matched %s features for %s dataset" % (matched, data_type), silent=False, mode=NotifyMode.INFO).send() unit = 'RefSeq' es = ExpressionSet(base_dir=exp.get_data_folder(), base_filename="%s_%s_es" % (block.uuid, data_type)) es.working_unit = unit es.store_assay_data_frame(df) return es, df, gpl_file
def process_data_frame(exp, block, df, ori, platform, unit, data_type="m_rna"): if settings.CELERY_DEBUG: import sys sys.path.append('/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg') import pydevd pydevd.settrace('localhost', port=6901, stdoutToServer=True, stderrToServer=True) df.set_index(df.columns[0], inplace=True) # if matrix is bad oriented, then do transposition if ori == "GxS": df = df.T # df.columns = df.iloc[0] # df = df.drop(df.index[0]) # if isinstance(df.columns[0][0], basestring): gpl_file = None if platform: AllUpdated( exp.pk, comment=u"Fetching platform %s" % platform, silent=False, mode=NotifyMode.INFO ).send() gpl_file = fetch_geo_gpl(exp, block, platform) df, matched = convert_ids(gpl_file, df, data_type) AllUpdated( exp.pk, comment=u"Matched %s features for %s dataset" % (matched, data_type), silent=False, mode=NotifyMode.INFO ).send() unit = 'RefSeq' es = ExpressionSet(base_dir=exp.get_data_folder(), base_filename="%s_%s_es" % (block.uuid, data_type)) es.working_unit = unit es.store_assay_data_frame(df) return es, df, gpl_file
def process_upload(self, exp, *args, **kwargs): """ @param exp: Experiment """ # TODO: move to celery self.clean_errors() sep = getattr(self, "csv_sep", " ") try: if not self.pheno_matrix: self.warnings.append(Exception("Phenotype is undefined")) pheno_df = None else: pheno_df = self.pheno_matrix.get_as_data_frame(sep) pheno_df.set_index(pheno_df.columns[0]) # TODO: solve somehow better: Here we add empty column with user class assignment pheno_df[ExpressionSet( None, None).pheno_metadata["user_class_title"]] = "" if self.m_rna_matrix is not None: m_rna_assay_df = self.m_rna_matrix.get_as_data_frame(sep) m_rna_es = ExpressionSet(base_dir=exp.get_data_folder(), base_filename="%s_m_rna_es" % self.uuid) m_rna_es.store_assay_data_frame(m_rna_assay_df) m_rna_es.store_pheno_data_frame(pheno_df) m_rna_es.working_unit = self.m_rna_unit self.set_out_var("m_rna_es", m_rna_es) # TODO: fetch GPL annotation if GPL id was provided if self.mi_rna_matrix is not None: mi_rna_assay_df = self.mi_rna_matrix.get_as_data_frame(sep) mi_rna_es = ExpressionSet(base_dir=exp.get_data_folder(), base_filename="%s_mi_rna_es" % self.uuid) mi_rna_es.store_assay_data_frame(mi_rna_assay_df) mi_rna_es.store_pheno_data_frame(pheno_df) self.set_out_var("mi_rna_es", mi_rna_es) if self.methyl_matrix is not None: methyl_assay_df = self.methyl_matrix.get_as_data_frame(sep) methyl_es = ExpressionSet(base_dir=exp.get_data_folder(), base_filename="%s_methyl_es" % self.uuid) methyl_es.store_assay_data_frame(methyl_assay_df) methyl_es.store_pheno_data_frame(pheno_df) self.set_out_var("methyl_es", methyl_es) self.do_action("success", exp) except Exception as e: ex_type, ex, tb = sys.exc_info() traceback.print_tb(tb) self.do_action("error", exp, e)