Example #1
0
def get_replicates(gemini_db):
    """
    Returns a dataframe of sample information with the following columns:
    samples, plate_num, tissue, replicate
    """

    samples_df = pd.DataFrame([[sample] + list(split_id(sample)) for sample in gem_ops.get_samples(gemini_db)], \
                              columns=["full_name", "sample", "plate", "tissue", "replicate"])
    return samples_df
Example #2
0
    min_allele_freq = args.min_allele_freq
    min_depth = args.min_depth
    min_alt_depth = args.min_alt_depth
    max_num_het = args.max_num_het
    max_aaf_all = args.max_aaf_all
    tissue = args.tissue.split(",")
    sample_pattern = args.sample_pattern
    annotations = args.annotations.split(",")
    results_file = args.results_file
    add_joint = args.add_joint

    if operation == "find_all":
        # Get all variants in a set of samples.

        # Get samples to process.
        samples = [sample for sample in gem_ops.get_samples(gemini_db) if re.search(sample_pattern, sample) > 0]

        # Get sample variants.
        all_vars_df = get_variants_in_samples(gemini_db, samples, annotations, min_allele_freq, min_alt_depth, min_depth, max_aaf_all, somatic=False)

        # Write results to file.
        if sample_pattern == ".*":
            sample_pattern = "all"
        out_filename = "find_vars_results_%s_minaf%.2f_ad%i_d%i.txt" % (sample_pattern, min_allele_freq, min_alt_depth, min_depth)
        out_file = open(out_filename, "w")
        out_file.write( all_vars_df.to_csv(sep="\t", index=False, float_format='%.3f') )
        out_file.close()

        print "Wrote results to file %s" % out_filename

    elif operation == "augment_vars":