def getassembly(args, parsedict): """ loads assembly or creates a new one and set its params from parsedict. Does not launch ipcluster. """ ## Creating an assembly with a full path in the name will "work" ## but it is potentially dangerous, so here we have assembly_name ## and assembly_file, name is used for creating new in cwd, file is ## used for loading existing. ## ## Be nice if the user includes the extension. #project_dir = ip.core.assembly._expander(parsedict['1']) #assembly_name = parsedict['0'] project_dir = ip.core.assembly._expander(parsedict['project_dir']) assembly_name = parsedict['assembly_name'] assembly_file = os.path.join(project_dir, assembly_name) ## Assembly creation will handle error checking on ## the format of the assembly_name ## make sure the working directory exists. if not os.path.exists(project_dir): os.mkdir(project_dir) try: ## If 1 and force then go ahead and create a new assembly if ('1' in args.steps) and args.force: data = ip.Assembly(assembly_name, cli=True) else: data = ip.load_json(assembly_file, cli=True) data._cli = True except IPyradWarningExit as _: ## if no assembly is found then go ahead and make one if '1' not in args.steps: raise IPyradWarningExit(\ " Error: You must first run step 1 on the assembly: {}"\ .format(assembly_file)) else: ## create a new assembly object data = ip.Assembly(assembly_name, cli=True) ## for entering some params... for param in parsedict: ## trap assignment of assembly_name since it is immutable. if param == "assembly_name": ## Raise error if user tried to change assembly name if parsedict[param] != data.name: data.set_params(param, parsedict[param]) else: ## all other params should be handled by set_params try: data.set_params(param, parsedict[param]) except IndexError as _: print(" Malformed params file: {}".format(args.params)) print(" Bad parameter {} - {}".format(param, parsedict[param])) sys.exit(-1) return data
def main(): """ main function """ ## not in ipython ip.__interactive__ = 0 header = \ "\n --------------------------------------------------"+\ "\n Analysis tools for ipyrad [v.{}]".format(ip.__version__)+\ "\n svd4tet -- fast quartet and tree inference "+\ "\n --------------------------------------------------" print(header) ## parse params file input (returns to stdout if --help or --version) args = parse_command_line() ## if JSON, load it if args.json: data = ip.load_json(args.json) data.outfiles.svdinput = data.outfiles.svdinput ## else create a tmp assembly for the seqarray else: if not args.output: raise IPyradWarningExit(" -o output_prefix required") if not args.seq: raise IPyradWarningExit(" -s sequence file required") ## create new JSON (Assembly) object data = ip.Assembly(args.output, quiet=True) data.outfiles.svdinput = args.seq data.set_params(1, "./") ## parse samples from the sequence file names = [] with iter(open(args.seq, 'r')) as infile: infile.next().strip().split() while 1: try: names.append(infile.next().split()[0]) except StopIteration: break ## store as Samples in Assembly data.samples = {name:ip.Sample(name) for name in names} ## store ipcluster info data._ipcluster["cores"] = args.cores if args.MPI: data._ipcluster["engines"] = "MPI" else: data._ipcluster["engines"] = "Local" ## launch ipcluster and register for later destruction data = ipcontroller_init(data) ## run svd4tet args = [data, args.boots, args.method, args.nquartets, args.force] data._clientwrapper(ipa.svd4tet.run, args, 45)
def getassembly(args, parsedict): """ loads assembly or creates a new one and set its params from parsedict. Does not launch ipcluster. """ ## Creating an assembly with a full path in the name will "work" ## but it is potentially dangerous, so here we have assembly_name ## and assembly_file, name is used for creating new in cwd, file is ## used for loading existing. ## ## Be nice if the user includes the extension. project_dir = ip.core.assembly._expander(parsedict['1']) assembly_name = parsedict['0'] assembly_file = os.path.join(project_dir, assembly_name) ## Assembly creation will handle error checking on ## the format of the assembly_name ## make sure the working directory exists. if not os.path.exists(project_dir): os.mkdir(project_dir) try: ## If 1 and force then go ahead and create a new assembly if ('1' in args.steps) and args.force: data = ip.Assembly(assembly_name) else: data = ip.load_json(assembly_file) except IPyradWarningExit as _: ## if no assembly is found then go ahead and make one if '1' not in args.steps: raise IPyradWarningExit(""" Error: Steps >1 ({}) requested but no current assembly found - {} """.format(args.steps, assembly_file)) else: ## create a new assembly object data = ip.Assembly(assembly_name) ## for entering some params... for param in parsedict: ## trap assignment of assembly_name since it is immutable. if param == str(0): ## only pass to set_params if user tried to change assembly_name ## it will raise an Exit error if parsedict[param] != data.name: data.set_params(param, parsedict[param]) else: ## all other params should be handled by set_params try: data.set_params(param, parsedict[param]) except IndexError as _: print(" Malformed params file: {}".format(args.params)) print(" Bad parameter {} - {}".format(param, parsedict[param])) sys.exit(-1) return data
def getassembly(args, parsedict): """ loads assembly or creates a new one and set its params from parsedict. Does not launch ipcluster. """ ## Creating an assembly with a full path in the name will "work" ## but it is potentially dangerous, so here we have assembly_name ## and assembly_file, name is used for creating new in cwd, file is ## used for loading existing. ## ## Be nice if the user includes the extension. project_dir = ip.core.assembly.expander(parsedict['1']) assembly_name = parsedict['0'] assembly_file = os.path.join(project_dir, assembly_name) ## Assembly creation will handle error checking on ## the format of the assembly_name ## make sure the working directory exists. if not os.path.exists(project_dir): os.mkdir(project_dir) try: ## If 1 and force then go ahead and create a new assembly if '1' in args.steps and args.force: data = ip.Assembly(assembly_name) else: data = ip.load_json(assembly_file) except IPyradWarningExit as inst: ## if no assembly is found then go ahead and make one if '1' not in args.steps: raise IPyradWarningExit(""" Error: Steps >1 ({}) requested but no current assembly found - {} """.format(args.steps, assembly_file)) else: ## create a new assembly object data = ip.Assembly(assembly_name) ## for entering some params... for param in parsedict: ## trap assignment of assembly_name since it is immutable. if param == str(0): ## only pass to set_params if user tried to change assembly_name ## it will raise an Exit error if parsedict[param] != data.name: data.set_params(param, parsedict[param]) else: ## all other params should be handled by set_params data.set_params(param, parsedict[param]) return data
def get_assembly(self): """ loads assembly or creates a new one and set its params from parsedict. Does not launch ipcluster. """ # Be nice if the user includes the extension. project_dir = self.parsedict['project_dir'] assembly_name = self.parsedict['assembly_name'] json_file = os.path.join(project_dir, assembly_name) if not json_file.endswith(".json"): json_file += ".json" # make sure the working directory exists. if not os.path.exists(project_dir): os.mkdir(project_dir) # Create new Assembly instead of loading if NEW if self.args.steps: # starting a new assembly if '1' in self.args.steps: if self.args.force: data = ip.Assembly(assembly_name, cli=True) else: if os.path.exists(json_file): raise IPyradError( "Assembly already exists, use force to overwrite") else: data = ip.Assembly(assembly_name, cli=True) else: data = ip.load_json(json_file, cli=True) else: data = ip.load_json(json_file, cli=True) # Update json assembly with params in paramsfile in case they changed for key, param in self.parsedict.items(): if key not in ["assembly_name"]: data.set_params(key, param) # store it. self.data = data
def showstats(parsedict): """ loads assembly or dies, and print stats to screen """ #project_dir = parsedict['1'] project_dir = parsedict["project_dir"] if not project_dir: project_dir = "./" ## Be nice if somebody also puts in the file extension #assembly_name = parsedict['0'] assembly_name = parsedict["assembly_name"] my_assembly = os.path.join(project_dir, assembly_name) ## If the project_dir doesn't exist don't even bother trying harder. if not os.path.isdir(project_dir): msg = """ Trying to print stats for Assembly ({}) that doesn't exist. You must first run steps before you can show results. """.format(project_dir) sys.exit(msg) if not assembly_name: msg = """ Assembly name is not set in params.txt, meaning it was either changed or erased since the Assembly was started. Please restore the original name. You can find the name of your Assembly in the "project dir": {}. """.format(project_dir) raise IPyradError(msg) data = ip.load_json(my_assembly, quiet=True, cli=True) print("\nSummary stats of Assembly {}".format(data.name) \ +"\n------------------------------------------------") if not data.stats.empty: print(data.stats) print("\n\nFull stats files"\ +"\n------------------------------------------------") fullcurdir = os.path.realpath(os.path.curdir) for i in range(1, 8): #enumerate(sorted(data.stats_files)): key = "s"+str(i) try: val = data.stats_files[key] val = val.replace(fullcurdir, ".") print("step {}: {}".format(i, val)) except (KeyError, AttributeError): print("step {}: None".format(i)) print("\n") else: print("No stats to display")
def showstats(parsedict): """ loads assembly or dies, and print stats to screen """ project_dir = parsedict['1'] ## Be nice if somebody also puts in the file extension assembly_name = parsedict['0'] my_assembly = os.path.join(project_dir, assembly_name) ## If the project_dir doesn't exist don't even bother trying harder. if not os.path.isdir(project_dir): msg = """ Trying to print stats for Assembly ({}) that doesn't exist. You must first run steps before you can show results. """.format(project_dir) sys.exit(msg) if not assembly_name: msg = """ Assembly name is not set in params.txt, meaning it was either changed or erased since the Assembly was started. Please restore the original name. You can find the name of your Assembly in the "project dir": {}. """.format(project_dir) raise IPyradError(msg) data = ip.load_json(my_assembly, quiet=True) print("\nSummary stats of Assembly {}".format(data.name) \ +"\n------------------------------------------------") if not data.stats.empty: print(data.stats) print("\n\nFull stats files"\ +"\n------------------------------------------------") fullcurdir = os.path.realpath(os.path.curdir) for i in range(1, 8): #enumerate(sorted(data.stats_files)): key = "s"+str(i) try: val = data.stats_files[key] val = val.replace(fullcurdir, ".") print("step {}: {}".format(i, val)) except (KeyError, AttributeError): print("step {}: None".format(i)) print("\n") else: print("No stats to display")
def show_stats(self): "loads assembly or dies, and print stats to screen" # Be nice if the user includes the extension. project_dir = self.parsedict['project_dir'] assembly_name = self.parsedict['assembly_name'] json_file = os.path.join(project_dir, assembly_name) if not json_file.endswith(".json"): json_file += ".json" if not os.path.exists(json_file): raise IPyradError( "Cannot find assembly {}".format(json_file)) # load the assembly data = ip.load_json(json_file, quiet=True, cli=True) print( "\nSummary stats of Assembly {}".format(data.name) + "\n------------------------------------------------") if not data.stats.empty: print(data.stats) print("\n\nFull stats files" + "\n------------------------------------------------") fullcurdir = os.path.realpath(os.path.curdir) for i in range(1, 8): #enumerate(sorted(data.stats_files)): key = "s" + str(i) try: val = data.stats_files[key] val = val.replace(fullcurdir, ".") print("step {}: {}".format(i, val)) except (KeyError, AttributeError): print("step {}: None".format(i)) print("\n") else: print("No stats to display")
## /filters -- filled for dups, left empty for others until step 7. ## .attr['filters'] = [f1, f2, f3, f4, f5] ## /seqs -- contains the clustered sequence data as string arrays ## .attr['samples'] = [samples] ## /edges -- gets the paired split locations for now. ## /snps -- left empty for now ## calls singlecat func inside LOGGER.info("building full database") build_h5_array(data, samples, ipyclient) # except Exception as inst: # LOGGER.error(inst) # raise IPyradWarningExit(inst) finally: ## delete the tmpdir shutil.rmtree(data.tmpdir) if __name__ == "__main__": ## get path to test dir/ ROOT = os.path.realpath(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))) ## load test data (pairgbs) DATA = ip.load_json("/home/deren/Documents/RADmissing/rad1/half_min4.json") # ## run step 6 DATA.run("6", force=True)
if __name__ == "__main__": ## get path to test dir/ ROOT = os.path.realpath( os.path.dirname(os.path.dirname(os.path.dirname(__file__)))) ## run test on pairgbs data1 # TEST = ip.load.load_assembly(os.path.join(\ # ROOT, "tests", "Ron", "Ron")) # TEST.step6(force=True) # print(TEST.stats) ## run test on pairgbs data1 # TEST = ip.load.load_assembly(os.path.join(\ # ROOT, "tests", "test_pairgbs", "test_pairgbs")) # TEST.step6(force=True) # print(TEST.stats) ## run test on rad data1 #TEST = ip.load.load_assembly(os.path.join(\ # ROOT, "tests", "test_rad", "data1")) #TEST.step6(force=True) #print(TEST.stats) ## load test data (pairgbs) DATA = ip.load_json("/home/deren/Documents/RADmissing/rad1/half_min4.json") #SAMPLES = DATA.samples.values() # ## run step 6 DATA.step6(force=True)
if not bidx: progressbar(njobs, finished) print("") ## convert to txt file for wQMC dump(data) ## run quartet joining algorithm if not bidx: run_qmc(data, boot=0) else: run_qmc(data, boot=1) ## reset the checkpoint_arr data.svd.checkpoint_arr = 0 if __name__ == "__main__": ## imports import ipyrad.analysis as ipa #import ipyrad as ip #import ipyparallel as ipp #DATA = ipyrad.load_json("~/Documents/ipyrad/tests/cli/cli.json") DATA = ipyrad.load_json("~/Documents/ipyrad/tests/iptutorial/cli.json") ## run ipa.svd4tet.wrapper(DATA, nboots=10, method='equal', nquarts=50, force=True)
## wrap job in try/finally to ensure cleanup try: apply_jobs(*args) except Exception as inst: LOGGER.warn(inst) raise finally: alignment_cleanup(data) if __name__ == "__main__": ## test... ## reload autosaved data. In case you quit and came back DATA = ipyrad.load_json("cli/cli.json") ## run step 6 DATA.step3(force=True) # DATA = Assembly("test") # DATA.get_params() # DATA.set_params(1, "./") # DATA.set_params(28, '/Volumes/WorkDrive/ipyrad/refhacking/MusChr1.fa') # DATA.get_params() # print(DATA.log) # DATA.step3() #PARAMS = {} #FASTQS = [] #QUIET = 0 #run(PARAMS, FASTQS, QUIET)