Ejemplo n.º 1
0
def main(args):
    #Get R ready
    # Get current pathway
    myPath = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))

    # Stablish path for LASSO script
    my_r_script_path = os.path.join(myPath, "lasso_enet.R")
    logger.info(my_r_script_path)

    # Activate pandas2ri
    pandas2ri.activate()

    # Running LASSO R sctrip
    with open(my_r_script_path, 'r') as f:
        rFile = f.read()
    lassoEnetScript = STAP(rFile, "lasso_enet")

    # Importing data trought interface
    dat = wideToDesign(args.input,
                       args.design,
                       args.uniqID,
                       group=args.group,
                       logger=logger)

    # Cleaning from missing data
    dat.dropMissing()

    # Transpossing data
    dat.trans = dat.transpose()
    dat.trans.columns.name = ""

    # Dropping nan columns from design
    removed = dat.design[dat.design[dat.group] == "nan"]
    dat.design = dat.design[dat.design[dat.group] != "nan"]
    dat.trans.drop(removed.index.values, axis=0, inplace=True)

    logger.info("{0} removed from analysis".format(removed.index.values))
    dat.design.rename(columns={dat.group: "group"}, inplace=True)
    dat.trans.rename(columns={dat.group: "group"}, inplace=True)

    #Generate a group List
    groupList = [
        title for title, group in dat.design.groupby("group")
        if len(group.index) > 2
    ]

    #Turn group list into pairwise combinations
    comboMatrix = np.array(list(it.combinations(groupList, 2)))
    comboLength = len(comboMatrix)

    #Run R
    correct_list_of_names = np.array(dat.trans.columns.values.tolist())
    returns = lassoEnetScript.lassoEN(dat.trans, dat.design, args.uniqID,
                                      correct_list_of_names, comboMatrix,
                                      comboLength, args.alpha, args.plots)
    robjects.r['write.table'](returns[0],
                              file=args.coefficients,
                              sep='\t',
                              quote=False,
                              row_names=False,
                              col_names=True)
    robjects.r['write.table'](returns[1],
                              file=args.flags,
                              sep='\t',
                              quote=False,
                              row_names=False,
                              col_names=True)
    # Finishing
    logger.info("Script Complete!")