"--fo", help= "name output figure. Extension indiciates figure type. Data files saved with same name but different extension" ) #parser.add_argument("--stagesPlot") parser.add_argument("--vmin", type=float) parser.add_argument("--vmax", type=float) parser.add_argument("--linkage_regTargets", default="complete") parser.add_argument("--linkage_TFs", default="complete") args = parser.parse_args() ########################################################################################################################### ## LOAD DATA corrDF = IOutils.loadDF(args.geneCorr) genes = list( set().union(*[IOutils.readListFromFile(x) for x in args.genes.split(",")])) genes_missing = [x for x in genes if not (x in corrDF.index)] if len(genes_missing) > 0: print("WARNING the following genes are not in correlation data {}".format( genes_missing)) genes = [x for x in genes if x in corrDF.index] TFs = list( set().union(*[IOutils.readListFromFile(x) for x in args.TFs.split(",")])) TFs_missing = [x for x in TFs if not (x in corrDF.index)] if len(TFs_missing) > 0: print("WARNING the following TFs are not in correlation data {}".format( TFs_missing)) TFs = [x for x in TFs if x in corrDF.index] ############################################################################################################################# ## COMPUTE SIGNED ADJACENCY
superstage_dict = OrderedDict([ (k, [int(re.search(r'(\d+)$', x).group(1)) for x in v.split(",")]) for k, v in superstage_dict.items() ]) ############################################################################################################ ## Load data cellData = IOutils.loadCellData( OrderedDict([("expr", args.fi_expr), ("pcComps", args.fi_stageIDs)])) cellData = pd.concat([ cellData.loc[:, ("expr", slice(None))], cellData.loc[:, ("rowData", "clust_ID")] ], axis=1) ## Remove unnecessary columns cells_allowed = IOutils.readListFromFile(args.cells) print("Restricting to {:d} allowed cells".format(len(cells_allowed))) cellData = cellData.loc[cells_allowed, :].copy() ############################################################################################################ ## COMPUTE CORRELATION BY STAGE if not args.noSingleStageCorr: stages = sorted(cellData["rowData"].loc[:, "clust_ID"].unique(), key=lambda x: int(x)) corrDict_stages = OrderedDict([]) for stage in stages: print("\tCalculating correlation for stage {}".format(stage)) exprDF = cellData["expr"].loc[cellData["rowData"].loc[:, "clust_ID"] == stage, :].copy() corrDict_stages[stage] = cf.pearsonCorrel_log10tpm(exprDF) ##Write results