D = D.astype(float) ###Applying a threshold for the read depth #User input and input check q = zp.question( title="Mutation Filtering", text= "Do you want to apply a threshold for read depth? Values below the threshold will be treated as absent." ) if q == True: thresh = zp.entry(title="Mutation Filtering", text="Threshold:") try: thresh = float(int(thresh)) except: zp.error(title="Mutation Filtering", text="Invalid input! Input must be an integer number!") sys.exit(1) #Application for col in D.columns: if ":DP" in col: D[col] = D[col].apply(lambda x: 0.0 if x < thresh else x) D[col.split(":")[0] + ":AF"].loc[D.loc[D[col] == 0].index] = 0 ###Applying a threshold for the allele frequency #User input and input check q = zp.question( title="Mutation Filtering", text= "Do you want to apply a threshold for allele frequency? Values below the threshold will be treated as absent." )
mname = "pielou_evenness" if metric == "Goods_coverage": metric = "goods_coverage" label = "Good's coverage" mname = "goods_coverage" if metric == "Chao1": metric = "chao1" label = "Chao1" mname = "chao1" if metric == "Faith_PD": try: tree_file = sys.argv[3] except: zp.error( title="CoMA", text= "A tree file is required for the calculation of Faith's phylogenetic diversity!" ) print("\nProcess terminated!") print( "\n________________________________________________________________________________\n" ) sys.exit(1) metric = "faith_pd" label = "Faith's PD" mname = "faith_pd" ftype = zp.entry( title="CoMA", text= "Which fileformat do you prefer?\n\neps, jpeg, pdf, png, ps, raw, rgba, svg, svgz, tiff\n"
outf.write(sample + len(variables.split(",")) * "\t" + "\n") zp.message( title="CoMA", text= "Please fill in all missing data in the map file; save then as tab-delimited text file. When you are finished, a quick check will be done to prove your input.\n\nClick now OK to start entering your metadata!", height=100) os.system("libreoffice --calc %s" % (outfile)) with open(outfile) as outf: outf.readline() if len(outf.readline().strip().split("\t")) == 1: zp.error( title="CoMA", text= "ATTENTION: Your map file is not tab-delimited! Please repeat this step and save the text file correctly using 'tab' as separator." ) print("\nProcess terminated!") print( "\n________________________________________________________________________________\n" ) sys.exit(0) errors = 0 for line in outf: if len([x for x in line.strip().split("\t") if x != "\t"]) <= len( variables.split(",")): zp.error(title="CoMA", text="ATTENTION: This line includes empty cells:\n\n" + line) errors += 1
tax_list[j].add(taxa[j].strip()) return tax_list input_file = sys.argv[1] mapping = pz.question( title="CoMA", text= "Do you want to use the information in the mapping file to group your samples?" ) if mapping == True: try: map_file = sys.argv[2] except: pz.error(title="CoMA", text="No mapping file provided, process terminated!") print("\nNo mapping file provided, process terminated!") print( "\n________________________________________________________________________________\n" ) sys.exit(1) map_DF = pd.read_csv(map_file, delimiter="\t", index_col=0) var = pz.entry( title="CoMA", text= "Based on which metadata variable do you want to group your samples?\n\nYou can select between the following variables:\n\n" + ", ".join(map_DF.columns) + "\n") if len(map_DF[var].value_counts()) > 3: pz.error( title="CoMA", text=
metric = "chebyshev" mname = "chebyshev" if metric == "Braycurtis": metric = "braycurtis" mname = "braycurtis" if metric == "Dice": metric = "dice" mname = "dice" if metric == "weighted_unifrac" or metric == "unweighted_unifrac": try: tree_file = sys.argv[3] except: zp.error( title="CoMA", text= "A tree file is required for the calculation of the Unifrac distance!" ) print("\nProcess terminated!") print( "\n________________________________________________________________________________\n" ) sys.exit(1) if metric == "Minkowski": try: p = int(sys.argv[3]) except: zp.error(title="CoMA", text="P-norm must be a positive integer!") print("\nProcess terminated!") print(
# # USAGE: python group.py otu_table_file import zenipy as pz import sys import os import statistics import time method = pz.entry(title="CoMA", text="Please choose the method for grouping:\n\nSum: sum of all reads\nMean: mean of all reads\nMedian: median of all reads\n") if method == None: print("\nGrouping process terminated!") print("\n________________________________________________________________________________\n") sys.exit(3) if method not in ["Sum", "Mean", "Median"]: pz.error(title="CoMA", text="Invalid Input, process terminated!") print("\nGrouping process terminated!") print("\n________________________________________________________________________________\n") sys.exit(3) input_file = "otu_table.txt" output_file = "otu_table_out.txt" with open(input_file) as inf: inf.readline() samples = [str(a.strip()) + "\n" for a in inf.readline().strip().split("\t")[1:-1]] num = [str(b) + "\t" for b in range(1, len(samples) + 1)] samples = [c + d for (c, d) in zip(num, samples)] count = 0 groups = []