def main(): """ Perform a correlation analysis of a Gene Expression Dataset and a Metabolomic Dataset. Arguments: :param geneDataset metDataset: Gene expression/Metabolomics wide dataset, respectively. :type geneDataset metDataset: files :param geneId metId: Name of the Genes/metabolites unique identifier column, respectively. :type geneId metId: strings :param geneAnnot metAnnot: Gene Expression/Metabolomics Annotation Datasets, respectively. :type geneAnnot metAnnot: files :param geneAnnotName metAnnotName: Name of the column of the Annotation file that contains genes/metabolites names respectively. :type geneAnnotName metAnnotName: strings :param meth: Methodology for the correlation function. One of 'pearson', 'spearman' or 'kendall'. :type meth: string :param thres: PValue Threshold to cut the correlations for the output table. :type thres: float Returns: :return output: Output table with the following information: Metabolite "\t" Gene "\t" Correlation "\t" pvalue :rtype output: file :return corMat: Correlation Matrix :rtype corMat: file :return fig: Network-like output figure :rtype fig: pdf """ warnings.filterwarnings("ignore", category=RRuntimeWarning) args = getOptions() logger = logging.getLogger() sl.setLogger(logger) logger.info(u"Importing data with the following parameters: " "\n\tGene Dataset: {}" "\n\tGene UniqueID: {}" "\n\tMet Dataset:{}" "\n\tMet UniqueID: {}" "\n\tMethod: {}" "\n\tThreshold: {}".format( args.geneDataset, args.geneId, args.metDataset, args.metId, args.meth, args.thres, )) modules.checkForDuplicates(args.geneDataset, args.geneId) modules.checkForDuplicates(args.metDataset, args.metId) pandas2ri.activate() with ires.path("gaitGM.data", "all_by_all_correlation.R") as my_r_script_path: f = open(my_r_script_path, "r") rFile = f.read() allByAllCorrScript = STAP(rFile, "corr_main_func") # Prepare Gene Expression Data geneTable = pd.read_table(args.geneDataset, sep="\t", header=0) if args.geneAnnot: R_gene_df = modules.Ids2Names(geneTable, args.geneId, args.geneAnnot, args.geneName) else: geneTable = geneTable.set_index(args.geneId) R_gene_df = pandas2ri.py2rpy(geneTable) # Prepare Metabolomics Data metTable = pd.read_table(args.metDataset, sep="\t", header=0) if args.metAnnot: R_met_df = modules.Ids2Names(metTable, args.metId, args.metAnnot, args.metName) else: metTable = metTable.set_index(args.metId) R_met_df = pandas2ri.py2rpy(metTable) allByAllCorrScript.corr_main_func( x=R_gene_df, y=R_met_df, meth=args.meth, thres=args.thres, corrMatPath=args.corMat, outputPath=args.output, figurePath=args.fig, )