Ejemplo n.º 1
0
def main():
    """
    Perform a correlation analysis of a Gene Expression Dataset and a Metabolomic Dataset.

    Arguments:
        :param geneDataset metDataset: Gene expression/Metabolomics wide dataset, respectively.
        :type geneDataset metDataset: files

        :param geneId metId: Name of the Genes/metabolites unique identifier column, respectively.
        :type geneId metId: strings

        :param geneAnnot metAnnot: Gene Expression/Metabolomics Annotation Datasets, respectively.
        :type geneAnnot metAnnot: files

        :param geneAnnotName metAnnotName: Name of the column of the Annotation file that contains
        genes/metabolites names respectively.
        :type geneAnnotName metAnnotName: strings

        :param meth: Methodology for the correlation function. One of 'pearson', 'spearman' or
        'kendall'.
        :type meth: string

        :param thres: PValue Threshold to cut the correlations for the output table.
        :type thres: float

    Returns:
        :return output: Output table with the following information: Metabolite "\t" Gene "\t"
        Correlation "\t" pvalue

        :rtype output: file

        :return corMat: Correlation Matrix
        :rtype corMat: file

        :return fig: Network-like output figure
        :rtype fig: pdf
    """

    warnings.filterwarnings("ignore", category=RRuntimeWarning)
    args = getOptions()
    logger = logging.getLogger()
    sl.setLogger(logger)
    logger.info(u"Importing data with the following parameters: "
                "\n\tGene Dataset:  {}"
                "\n\tGene UniqueID:  {}"
                "\n\tMet Dataset:{}"
                "\n\tMet UniqueID:  {}"
                "\n\tMethod:  {}"
                "\n\tThreshold:  {}".format(
                    args.geneDataset,
                    args.geneId,
                    args.metDataset,
                    args.metId,
                    args.meth,
                    args.thres,
                ))

    modules.checkForDuplicates(args.geneDataset, args.geneId)
    modules.checkForDuplicates(args.metDataset, args.metId)
    pandas2ri.activate()
    with ires.path("gaitGM.data",
                   "all_by_all_correlation.R") as my_r_script_path:
        f = open(my_r_script_path, "r")
        rFile = f.read()
    allByAllCorrScript = STAP(rFile, "corr_main_func")
    # Prepare Gene Expression Data
    geneTable = pd.read_table(args.geneDataset, sep="\t", header=0)
    if args.geneAnnot:
        R_gene_df = modules.Ids2Names(geneTable, args.geneId, args.geneAnnot,
                                      args.geneName)
    else:

        geneTable = geneTable.set_index(args.geneId)
        R_gene_df = pandas2ri.py2rpy(geneTable)

    # Prepare Metabolomics Data
    metTable = pd.read_table(args.metDataset, sep="\t", header=0)
    if args.metAnnot:
        R_met_df = modules.Ids2Names(metTable, args.metId, args.metAnnot,
                                     args.metName)
    else:
        metTable = metTable.set_index(args.metId)
        R_met_df = pandas2ri.py2rpy(metTable)

    allByAllCorrScript.corr_main_func(
        x=R_gene_df,
        y=R_met_df,
        meth=args.meth,
        thres=args.thres,
        corrMatPath=args.corMat,
        outputPath=args.output,
        figurePath=args.fig,
    )