def dictor(breed, cdfs): breed_dict = {} name_dict = {} b_list = [] for i in cdfs: if str(breed) in str(i): b_list.append(i) for ber in b_list: a = ber.split('-') print('a=', a) b = list(a) #print a print('b[1]=', b[1]) if b[1] in breed_dict: expr = load_expr(ber) name_dict[b[1]].append(ber) breed_dict[b[1]].append(expr) else: expr = load_expr(ber) name_dict[b[1]] = [ber] breed_dict[b[1]] = [expr] #print breed_dict return breed_dict, name_dict
def rep_dict(cdfs): """ @summary: Creates two dictionaries, one consisting of the experiment names, ordered by ID number the second consisting of the sample names, ordered by ID number as well. These dictionaries allow for the replicate grouping alignment. @param cdfs: List of .expr files to be aligned with each other @return: Breed_dict[], dictionary containing loaded experiments @return: name_dict[], dictionary containing sample name associated to experiment files """ breed_dict = {} name_dict = {} for i in cdfs: a = i.split('-') print(a) b = list(a) # print a c = (b[3]).split('_') if c[0] in breed_dict: expr = load_expr(i) name_dict[c[0]].append(i) breed_dict[c[0]].append(expr) else: expr = load_expr(i) name_dict[c[0]] = [i] breed_dict[c[0]] = [expr] print(breed_dict) return breed_dict, name_dict
def alifeGuru(listem, folder_expr): # within replicates alignment parameters Dw = 2.0 # rt modulation [s] Gw = 0.30 # gap penalty # do the alignment trees = [] expr_list = [] expr_dir = folder_expr for gru_list in listem: for item in gru_list: print('Aligning...' + item) file_name = os.path.join(expr_dir, item) expr = load_expr(file_name) expr_list.append(expr) F1 = exprl2alignment(expr_list) print('F1' + '\n') print(F1) trees.append(F1) for t in trees: T1 = PairwiseAlignment(t, Dw, Gw) print('T1' + '\n') print(T1) A1 = align_with_tree(T1, min_peaks=2) A1.write_csv( '/home/cocopalacelove/Desktop/StrawberryExotic/output/Alignments/fullSpec_rt.csv', '/home/cocopalacelove/Desktop/StrawberryExotic/output/Alignments/fullSpec_area.csv' ) print(trees)
def main(): folder_exprs = '/home/juicebox/Desktop/StrawberryExotic/parameters_v/bigFold' llamas = [ 'Alexandria', 'Bucharica', 'Capron', 'Tortona', 'Mara', 'Mignonette', 'Strawberry', 'Viridis' ] listem = [] bigF = [] for name in llamas: nomo = '*' + name + '*' + '.cdf.expr' print('nomo=', nomo) list_of_expr, names = glob(glob_pattern=nomo, directoryname=folder_exprs) print('LoE=', list_of_expr) print('name=', name) listem.append(list_of_expr) for var in listem: print(var) expr_loaded = [] for straw in var: expr = load_expr(straw) expr_loaded.append(expr) print(len(expr_loaded)) bigF.append(expr_loaded) print('bigF=', bigF) aligner(bigF) print('Done!')
def load_expr_list_from_runlist(runs): el = [] for run in runs: file_name = os.path.join(expr_dir, run + ".expr") expr = load_expr(file_name) el.append(expr) return el
def load_expr_list(): # loads expr list from a directory of exprs global exprdir el = [] for file in glob.glob(os.path.join(exprdir, '*.expr')): print file expr = load_expr(file) el.append(expr) return el
def load_expr_list(): # loads expr list from a directory of exprs global exprdir el = [] for file in glob.glob( os.path.join(exprdir, '*.expr') ): print file expr = load_expr(file) el.append(expr) return el
def lil_dictor(cdfs): breed_dict = {} name_dict = {} for i in cdfs: a = i.split('-') print(a) b = list(a) #print a print(b[1]) if b[1] in breed_dict: expr = load_expr(i) name_dict[b[1]].append(i) breed_dict[b[1]].append(expr) else: expr = load_expr(i) name_dict[b[1]] = [i] breed_dict[b[1]] = [expr] print(breed_dict) return breed_dict, name_dict
def alife(exprZ, folder_expr): # within replicates alignment parameters Dw = 10.0 # rt modulation [s] Gw = 0.30 # gap penalty # do the alignment expr_list = [] expr_dir = folder_expr for expr_code in exprZ: print('Aligning...' + expr_code) file_name = os.path.join(expr_dir, expr_code) expr = load_expr(file_name) expr_list.append(expr) F1 = exprl2alignment(expr_list) print('F1' + '\n') print(F1) T1 = PairwiseAlignment(F1, Dw, Gw) print('T1' + '\n') print(T1) A1 = align_with_tree(T1, min_peaks=2) A1.write_csv( '/home/cocopalacelove/Desktop/StrawberryExotic/output/Alignments/Allstar10_rt.csv', '/home/cocopalacelove/Desktop/StrawberryExotic/output/Alignments/Allstar10_area.csv' )
# default, float masses with interval (bin interval) of one from min mass print "default intensity matrix, bin interval = 1, boundary +/- 0.5" im = build_intensity_matrix(data) im.null_mass(73) im.null_mass(147) n_scan, n_mz = im.get_size() for ii in range(n_mz): ic = im.get_ic_at_index(ii) ic_smooth = savitzky_golay(ic) ic_base = tophat(ic_smooth, struct="1.5m") im.set_ic_at_index(ii, ic_base) # Load the experiment exper = load_expr(expr_file) # Load the peak list peak_list = exper.get_peak_list() # Pass Ion Chromatograms into a list of ICs n_mz = len(im.get_mass_list()) ic = [] for m in range(n_mz): ic.append(im.get_ic_at_index(m)) # Create a new display object, this time plot four ICs # and the TIC, as well as the peak list display = Display()
# define the input experiments list exprA_codes = ["a0806_077", "a0806_078", "a0806_079"] exprB_codes = ["a0806_140", "a0806_141", "a0806_142"] # within replicates alignment parameters Dw = 2.5 # rt modulation [s] Gw = 0.30 # gap penalty # do the alignment print 'Aligning expt A' expr_list = [] expr_dir = "../61a/output/" for expr_code in exprA_codes: file_name = os.path.join(expr_dir, expr_code + ".expr") expr = load_expr(file_name) expr_list.append(expr) F1 = exprl2alignment(expr_list) T1 = PairwiseAlignment(F1, Dw, Gw) A1 = align_with_tree(T1, min_peaks=2) top_ion_list = A1.common_ion() A1.write_common_ion_csv('output/area2.csv', top_ion_list) print 'Aligning expt B' expr_list = [] expr_dir = "../61b/output/" for expr_code in exprB_codes: file_name = os.path.join(expr_dir, expr_code + ".expr") expr = load_expr(file_name) expr_list.append(expr)
def main(): parser = argparse.ArgumentParser( description= "Pre-processing & Peak detection tool for GC-MS .cdf formatted data") parser.add_argument( "-e", action="store", dest="exprDir", nargs="?", type=str, default="/workdir2/cpowell/rasp2018/", help="Location of .expr files to be aligned; Default= '/tmp/' ") parser.add_argument( "-o", action="store", dest="opDir", nargs="?", type=str, default="/tmp/", help="location to store the alignment .csv output file", ) parser.add_argument( "-m", action="store", nargs="?", const=1, type=float, default=2.5, help= "Modulation time allowed between peaks considered similar; Default=2.5", dest="mod", ) parser.add_argument( "-g", action="store", nargs="?", const=1, type=float, default=0.30, help="Gap penalty; Default=0.30", dest="gap", ) parser.add_argument( "-p", action="store", nargs="?", const=1, type=int, default=2, help= "Minimum number of peaks pre sample required for alignment ; Default=2", dest="minPeak", ) parser.add_argument( "-n", action="store", nargs="?", type=str, default="alignment", help= "Number of points used to determine window size for peak detection; Default='alignment'", dest="nameTag", ) args = parser.parse_args() print(args) #folder_with_exprs = sys.argv[1] #align_dir = sys.argv[2] #mod = sys.argv[3] #gp = sys.argv[4] expr_loaded = [] list_of_exprs, names = glob(glob_pattern='*.expr', directoryname=args.exprDir) for i in list_of_exprs: print(i) expr = load_expr(i) expr_loaded.append(expr) singleAlign(expr_loaded, args.opDir, args.mod, args.gap, args.minPeak, args.nameTag) print('Done!')
# define the input experiments list exprA_codes = [ "a0806_077", "a0806_078", "a0806_079" ] exprB_codes = [ "a0806_140", "a0806_141", "a0806_142" ] # within replicates alignment parameters Dw = 2.5 # rt modulation [s] Gw = 0.30 # gap penalty # do the alignment print 'Aligning expt A' expr_list = [] expr_dir = "../61a/output/" for expr_code in exprA_codes: file_name = os.path.join(expr_dir, expr_code + ".expr") expr = load_expr(file_name) expr_list.append(expr) F1 = exprl2alignment(expr_list) T1 = PairwiseAlignment(F1, Dw, Gw) A1 = align_with_tree(T1, min_peaks=2) top_ion_list = A1.common_ion() A1.write_common_ion_csv('output/area2.csv', top_ion_list) print 'Aligning expt B' expr_list = [] expr_dir = "../61b/output/" for expr_code in exprB_codes: file_name = os.path.join(expr_dir, expr_code + ".expr") expr = load_expr(file_name) expr_list.append(expr)
def main(): parser = argparse.ArgumentParser( description= "Peak alignment tool with adjustable tolerances & grouping strategies") parser.add_argument( "-e", action="store", dest="exprDir", nargs="?", type=str, default="/workdir2/cpowell/rasp2018/", help= "Location of .expr files to be aligned; Default= '/workdir2/cpowell/rasp2018/' " ) parser.add_argument( "-o", action="store", dest="opDir", nargs="?", type=str, default="/tmp/", help= "location to store the alignment .csv output file; Default= '/tmp/' ", ) parser.add_argument( "-m", action="store", nargs="?", const=1, type=float, default=2.5, help= "Retention time tolerance value between compared peaks; Default=2.5", dest="mod", ) parser.add_argument( "-m2", action="store", nargs="?", const=1, type=float, default=2.5, help= "*Between-state: 2nd Retention time tolerance value between compared peaks; Default=2.5", dest="mod2", ) parser.add_argument( "-g", action="store", nargs="?", const=1, type=float, default=0.30, help="Gap penalty for a non-aligning peak; Default=0.30", dest="gap", ) parser.add_argument( "-g2", action="store", nargs="?", const=1, type=float, default=0.30, help= "*Between-state: 2nd gap penalty for a non-aligning peak; Default=0.30", dest="gap2", ) parser.add_argument( "-p", action="store", nargs="?", const=1, type=int, default=2, help= "Minimum number of peaks per sample required for alignment; Default=2", dest="minPeak", ) parser.add_argument( "-p2", action="store", nargs="?", const=1, type=int, default=2, help= "*Between-state: 2nd minimum number of peaks pre sample required for alignment; Default=2", dest="minPeak2") parser.add_argument( "-n", action="store", nargs="?", type=str, default="alignment-", help= "Identifier string used for .csv file storage; Default='alignment-'", dest="nameTag") parser.add_argument( "-as", choices=['inner', 'between'], type=str, default="inner", help= "Alignment strategies: inner-state, between-state; Default='inner' ", dest='alignS') args = parser.parse_args() print(args) list_of_exprs, names = glob(glob_pattern='*.expr', directoryname=args.exprDir) if args.alignS == "inner": print('Singular alignment run') expr_loaded = [] for i in list_of_exprs: # print(i) expr = load_expr(i) expr_loaded.append(expr) singleAlign(expr_loaded, args.opDir, args.mod, args.gap, args.minPeak, args.nameTag) print('Done!') elif args.alignS == "between": print('Between-state alignment run') berries, name = rep_dict(list_of_exprs) # print("berries=", berries) # print("names", name) repAlign(berries, args.opDir, args.mod, args.mod2, args.gap, args.gap2, args.minPeak, args.minPeak2, args.nameTag) print('Done!')