Ejemplo n.º 1
0
    def process_upload(self, exp, *args, **kwargs):
        """
            @param exp: Experiment
        """
        self.clean_errors()

        assay_df = pd.DataFrame.from_csv(self.es_matrix.get_file())

        es = ExpressionSet(base_dir=exp.get_data_folder(),
                           base_filename="%s_annotation" % self.uuid)

        pheno_df = pd.DataFrame.from_csv(self.pheno_matrix.get_file())
        pheno_df.set_index(pheno_df.columns[0])

        user_class_title = es.pheno_metadata["user_class_title"]
        if user_class_title not in pheno_df.columns:
            pheno_df[es.pheno_metadata["user_class_title"]] = ""

        # if matrix is bad oriented, then do transposition
        if self.es_matrix_ori == "GxS":
            assay_df = assay_df.T

        es.store_assay_data_frame(assay_df)
        es.store_pheno_data_frame(pheno_df)

        if self.working_unit:
            es.working_unit = self.working_unit

        self.set_out_var("expression_set", es)

        exp.store_block(self)

        self.do_action("success", exp)
Ejemplo n.º 2
0
    def process_upload(self, exp, *args, **kwargs):
        """
            @param exp: Experiment
        """
        self.clean_errors()

        assay_df = pd.DataFrame.from_csv(self.es_matrix.get_file())

        es = ExpressionSet(base_dir=exp.get_data_folder(),
                           base_filename="%s_annotation" % self.uuid)

        pheno_df = pd.DataFrame.from_csv(self.pheno_matrix.get_file())
        pheno_df.set_index(pheno_df.columns[0])

        user_class_title = es.pheno_metadata["user_class_title"]
        if user_class_title not in pheno_df.columns:
            pheno_df[es.pheno_metadata["user_class_title"]] = ""

        es.store_assay_data_frame(assay_df)
        es.store_pheno_data_frame(pheno_df)

        if self.working_unit:
            es.working_unit = self.working_unit

        self.set_out_var("expression_set", es)

        exp.store_block(self)

        self.do_action("success", exp)
Ejemplo n.º 3
0
    def process_upload(self, exp, *args, **kwargs):
        """
            @param exp: Experiment
        """
        # TODO: move to celery
        self.clean_errors()
        sep = getattr(self, "csv_sep", " ")

        try:
            if not self.pheno_matrix:
                self.warnings.append(Exception("Phenotype is undefined"))
                pheno_df = None
            else:
                pheno_df = self.pheno_matrix.get_as_data_frame(sep)
                pheno_df.set_index(pheno_df.columns[0])

                # TODO: solve somehow better: Here we add empty column with user class assignment
                pheno_df[ExpressionSet(None, None).pheno_metadata["user_class_title"]] = ""

            if self.m_rna_matrix is not None:
                m_rna_assay_df = self.m_rna_matrix.get_as_data_frame(sep)

                m_rna_es = ExpressionSet(base_dir=exp.get_data_folder(),
                                        base_filename="%s_m_rna_es" % self.uuid)
                m_rna_es.store_assay_data_frame(m_rna_assay_df)
                m_rna_es.store_pheno_data_frame(pheno_df)
                m_rna_es.working_unit = self.m_rna_unit

                self.set_out_var("m_rna_es", m_rna_es)

                # TODO: fetch GPL annotation if GPL id was provided

            if self.mi_rna_matrix is not None:
                mi_rna_assay_df = self.mi_rna_matrix.get_as_data_frame(sep)

                mi_rna_es = ExpressionSet(base_dir=exp.get_data_folder(),
                                        base_filename="%s_mi_rna_es" % self.uuid)
                mi_rna_es.store_assay_data_frame(mi_rna_assay_df)
                mi_rna_es.store_pheno_data_frame(pheno_df)

                self.set_out_var("mi_rna_es", mi_rna_es)

            if self.methyl_matrix is not None:

                methyl_assay_df = self.methyl_matrix.get_as_data_frame(sep)

                methyl_es = ExpressionSet(base_dir=exp.get_data_folder(),
                                          base_filename="%s_methyl_es" % self.uuid)
                methyl_es.store_assay_data_frame(methyl_assay_df)
                methyl_es.store_pheno_data_frame(pheno_df)

                self.set_out_var("methyl_es", methyl_es)

            self.do_action("success", exp)
        except Exception as e:
            ex_type, ex, tb = sys.exc_info()
            traceback.print_tb(tb)
            self.do_action("error", exp, e)
Ejemplo n.º 4
0
def process_data_frame(exp, block, df, ori, platform, unit, data_type="m_rna"):
    if settings.CELERY_DEBUG:
        import sys
        sys.path.append(
            '/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg'
        )
        import pydevd
        pydevd.settrace('localhost',
                        port=6901,
                        stdoutToServer=True,
                        stderrToServer=True)

    df.set_index(df.columns[0], inplace=True)
    # if matrix is bad oriented, then do transposition
    if ori == "GxS":
        df = df.T
        # df.columns = df.iloc[0]
        # df = df.drop(df.index[0])
    # if isinstance(df.columns[0][0], basestring):
    gpl_file = None
    if platform:
        AllUpdated(exp.pk,
                   comment=u"Fetching platform %s" % platform,
                   silent=False,
                   mode=NotifyMode.INFO).send()
        gpl_file = fetch_geo_gpl(exp, block, platform)
        df, matched = convert_ids(gpl_file, df, data_type)
        AllUpdated(exp.pk,
                   comment=u"Matched %s features for %s dataset" %
                   (matched, data_type),
                   silent=False,
                   mode=NotifyMode.INFO).send()
        unit = 'RefSeq'
    es = ExpressionSet(base_dir=exp.get_data_folder(),
                       base_filename="%s_%s_es" % (block.uuid, data_type))
    es.working_unit = unit
    es.store_assay_data_frame(df)
    return es, df, gpl_file
Ejemplo n.º 5
0
def process_data_frame(exp, block, df, ori, platform, unit, data_type="m_rna"):
    if settings.CELERY_DEBUG:
        import sys
        sys.path.append('/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg')
        import pydevd
        pydevd.settrace('localhost', port=6901, stdoutToServer=True, stderrToServer=True)

    df.set_index(df.columns[0], inplace=True)
    # if matrix is bad oriented, then do transposition
    if ori == "GxS":
        df = df.T
        # df.columns = df.iloc[0]
        # df = df.drop(df.index[0])
    # if isinstance(df.columns[0][0], basestring):


    gpl_file = None
    if platform:
        AllUpdated(
            exp.pk,
            comment=u"Fetching platform %s" % platform,
            silent=False,
            mode=NotifyMode.INFO
        ).send()
        gpl_file = fetch_geo_gpl(exp, block, platform)
        df, matched = convert_ids(gpl_file, df, data_type)
        AllUpdated(
            exp.pk,
            comment=u"Matched %s features for %s dataset" % (matched, data_type),
            silent=False,
            mode=NotifyMode.INFO
        ).send()
        unit = 'RefSeq'
    es = ExpressionSet(base_dir=exp.get_data_folder(),
                       base_filename="%s_%s_es" % (block.uuid, data_type))
    es.working_unit = unit
    es.store_assay_data_frame(df)
    return es, df, gpl_file
Ejemplo n.º 6
0
    def process_upload(self, exp, *args, **kwargs):
        """
            @param exp: Experiment
        """
        # TODO: move to celery
        self.clean_errors()
        sep = getattr(self, "csv_sep", " ")

        try:
            if not self.pheno_matrix:
                self.warnings.append(Exception("Phenotype is undefined"))
                pheno_df = None
            else:
                pheno_df = self.pheno_matrix.get_as_data_frame(sep)
                pheno_df.set_index(pheno_df.columns[0])

                # TODO: solve somehow better: Here we add empty column with user class assignment
                pheno_df[ExpressionSet(
                    None, None).pheno_metadata["user_class_title"]] = ""

            if self.m_rna_matrix is not None:
                m_rna_assay_df = self.m_rna_matrix.get_as_data_frame(sep)

                m_rna_es = ExpressionSet(base_dir=exp.get_data_folder(),
                                         base_filename="%s_m_rna_es" %
                                         self.uuid)
                m_rna_es.store_assay_data_frame(m_rna_assay_df)
                m_rna_es.store_pheno_data_frame(pheno_df)
                m_rna_es.working_unit = self.m_rna_unit

                self.set_out_var("m_rna_es", m_rna_es)

                # TODO: fetch GPL annotation if GPL id was provided

            if self.mi_rna_matrix is not None:
                mi_rna_assay_df = self.mi_rna_matrix.get_as_data_frame(sep)

                mi_rna_es = ExpressionSet(base_dir=exp.get_data_folder(),
                                          base_filename="%s_mi_rna_es" %
                                          self.uuid)
                mi_rna_es.store_assay_data_frame(mi_rna_assay_df)
                mi_rna_es.store_pheno_data_frame(pheno_df)

                self.set_out_var("mi_rna_es", mi_rna_es)

            if self.methyl_matrix is not None:

                methyl_assay_df = self.methyl_matrix.get_as_data_frame(sep)

                methyl_es = ExpressionSet(base_dir=exp.get_data_folder(),
                                          base_filename="%s_methyl_es" %
                                          self.uuid)
                methyl_es.store_assay_data_frame(methyl_assay_df)
                methyl_es.store_pheno_data_frame(pheno_df)

                self.set_out_var("methyl_es", methyl_es)

            self.do_action("success", exp)
        except Exception as e:
            ex_type, ex, tb = sys.exc_info()
            traceback.print_tb(tb)
            self.do_action("error", exp, e)