def get_variants_in_samples(gemini_db, samples, annotations, min_allele_freq, min_alt_depth, min_depth, max_aaf_all, somatic=False): """ Returns a dataframe with variants from all samples. """ if somatic: get_vars_fn = gem_ops.get_somatic_vars_in_sample else: get_vars_fn = gem_ops.get_vars_in_sample print >> sys.stderr, "Samples to process: ", ", ".join(samples) all_vars = [] for sample in samples: start = time.time() sample_vars = get_vars_fn(gemini_db, annotations, sample, min_allele_freq=min_allele_freq, min_alt_depth=min_alt_depth, min_depth=min_depth, max_aaf_all=max_aaf_all) end = time.time() all_vars.append(sample_vars) print >> sys.stderr, sample, len(sample_vars), end-start # Combine all variants together and reset index so adding metadata is easy. all_vars_df = gem_ops.convert_cols(pd.concat(all_vars)) all_vars_df.reset_index(inplace=True, drop=True) # Using sample column, create and populate columns for id, plate, tissue, and replicate. sample_attrs = pd.DataFrame(list(all_vars_df["sample"].apply(lambda s: split_id(s))), columns=["id", "plate", "tissue", "replicate"]) for i, col in enumerate(["id", "plate", "tissue", "replicate"]): all_vars_df.insert(i+1, col, pd.Series()) all_vars_df[col] = sample_attrs[col] return all_vars_df
# Write results to file. if sample_pattern == ".*": sample_pattern = "all" out_filename = "find_vars_results_%s_minaf%.2f_ad%i_d%i.txt" % (sample_pattern, min_allele_freq, min_alt_depth, min_depth) out_file = open(out_filename, "w") out_file.write( all_vars_df.to_csv(sep="\t", index=False, float_format='%.3f') ) out_file.close() print "Wrote results to file %s" % out_filename elif operation == "augment_vars": # Augment variants with updated and joint information. # Read results into dataframe. results_df = gem_ops.convert_cols( pd.read_csv(results_file, sep="\t") ) results_df = filter_and_augment_variants(results_df, min_allele_freq, min_alt_depth, min_depth, max_num_het, tissue, add_joint_cols) # Print augmented results. augmented_out_file = open("augmented_" + results_file, "w") augmented_out_file.write(results_df.to_csv(sep="\t", index=False)) augmented_out_file.close() print "Wrote augmented variants to file %s" % augmented_out_file # Print joint variants. if add_joint: joint_out_file = open("joint_" + results_file, "w") print_joint_variants(results_df, joint_out_file, all_cols=True) joint_out_file.close()