Python Flags.merge Examples

Programming Language: Python

Namespace/Package Name: dataManager.flags

Class/Type: Flags

Method/Function: merge

Examples at hotexamples.com: 2

Python Flags.merge - 2 examples found. These are the top rated real world Python examples of dataManager.flags.Flags.merge extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Flags(10)

addColumn(10)

merge(2)

to_csv(1)

Example #1

Show file

File: merge_flags.py Project: mrthoburn/SECIMTools

def main(args):
    # Need to take each arg and turn into data frame and add to new list
    flagDataFrameList = []
    logger.info("Importing data")

    # Check for commas, commas are used in galaxy. If there are commas separate
    # the list by commas
    if ',' in args.flagFiles[0]:
        args.flagFiles = args.flagFiles[0].split(',')

    # If args.filename is provided then use it to add its name to column names
    # This paramether will should be used only on galaxy
    if args.filename:
        # Cleaning weird characters on file names and replacing them with '_'. 
        filenames = [cleanStr(x=fname) for fname in args.filename]


    # Convert files into dataframes and populate into new list
    for flagFile,filename in zip(args.flagFiles,filenames):
        # Read table
        dataFrame = pd.read_table(flagFile)

        # Flag uniqID
        if args.flagUniqID:
            try:
                dataFrame.set_index(args.flagUniqID, inplace=True)
            except:
                logger.error("Index {0} does not exist on file.".format(args.flagUniqID))

        dataFrame.columns=[name+"_"+filename for name in dataFrame.columns]

        # List of frame
        flagDataFrameList.append(dataFrame)

    #logger.info("Checking all indexes are the same")

    # Merge flags using Flags class
    mergedFlags = Flags.merge(flagDataFrameList)

    # Export merged flags
    # NOTE: Pandas cannot store NANs as an int. If there are NANs from the
    # merge, then the column becomes a float. Here I change the float output to
    # look like an int.
    mergedFlags.to_csv(args.mergedFile, float_format='%.0f', sep='\t')
    logger.info("Script Complete!")

Example #2

Show file

File: bland_altmant_plot.py Project: mrthoburn/SECIMTools

def main(args):
    # Import data
    dat = wideToDesign(args.input,
                       args.design,
                       args.uniqID,
                       args.group,
                       logger=logger)

    # Get a list of samples to process, if processOnly is specified only
    # analyze specified group.
    if args.processOnly:
        dat.design = dat.design[dat.design[args.group].isin(args.processOnly)]
        toProcess = dat.design.index
        dat.sampleIDs = toProcess.tolist()

    # Create dataframe with sampleIDs that are to be analyzed.
    dat.keep_sample(dat.sampleIDs)

    # Get list of pairwise combinations. If group is specified, only do
    # within group combinations.
    combos = list()
    if args.group:
        # If group is given, only do within group pairwise combinations
        logger.info('Only doing within group, pairwise comparisons.')
        for groupName, dfGroup in dat.design.groupby(dat.group):
            combos.extend(list(combinations(dfGroup.index, 2)))
    else:
        logger.info('Doing all pairwise comparisons. This could take a while!')
        # Get all pairwise combinations for all samples
        combos.extend(list(combinations(dat.sampleIDs, 2)))

    # Open a multiple page PDF for plots
    ppBA = PdfPages(args.baName)

    # Loop over combinations and generate plots and return a list of flags.
    logger.info('Generating flags and plots.')
    flags = map(lambda combo: iterateCombo(dat, combo, ppBA), combos)

    # Close PDF with plots
    ppBA.close()

    # Merge flags
    logger.info('Merging outlier flags.')
    merged = Flags.merge(flags)

    # Summarize flags
    logger.info('Summarizing outlier flags.')
    propSample, propFeature, propSample_p, propFeature_p, propSample_c, propFeature_c, propSample_d, propFeature_d = summarizeFlags(
        dat, merged, combos)
    plotFlagDist(propSample, propFeature, args.distName)

    # Create sample level flags
    flag_sample = Flags(index=dat.sampleIDs)
    flag_sample.addColumn(column='flag_sample_BA_outlier',
                          mask=(propSample >= args.sampleCutoff))
    flag_sample.addColumn(column='flag_sample_BA_pearson',
                          mask=(propSample_p >= args.sampleCutoff))
    flag_sample.addColumn(column='flag_sample_BA_cooks',
                          mask=(propSample_c >= args.sampleCutoff))
    flag_sample.addColumn(column='flag_sample_BA_dffits',
                          mask=(propSample_d >= args.sampleCutoff))
    flag_sample.df_flags.index.name = "sampleID"
    flag_sample.df_flags.to_csv(args.flagSample, sep='\t')

    # Create metabolite level flags
    flag_metabolite = Flags(dat.wide.index)
    flag_metabolite.addColumn(column='flag_feature_BA_outlier',
                              mask=(propFeature >= args.featureCutoff))
    flag_metabolite.addColumn(column='flag_feature_BA_pearson',
                              mask=(propFeature_p >= args.featureCutoff))
    flag_metabolite.addColumn(column='flag_feature_BA_cooks',
                              mask=(propFeature_c >= args.featureCutoff))
    flag_metabolite.addColumn(column='flag_feature_BA_dffits',
                              mask=(propFeature_d >= args.featureCutoff))
    flag_metabolite.df_flags.to_csv(args.flagFeature, sep='\t')

    # Finish Script
    logger.info("Script Complete!")