Ejemplo n.º 1
0
def main():
    """ main function """
    ## not in ipython
    ip.__interactive__ = 0

    header = \
    "\n --------------------------------------------------"+\
    "\n  Analysis tools for ipyrad [v.{}]".format(ip.__version__)+\
    "\n  svd4tet -- fast quartet and tree inference "+\
    "\n --------------------------------------------------"
    print(header)

    ## parse params file input (returns to stdout if --help or --version)
    args = parse_command_line()

    ## if JSON, load it
    if args.json:
        data = ip.load_json(args.json)
        data.outfiles.svdinput = data.outfiles.svdinput

    ## else create a tmp assembly for the seqarray
    else:
        if not args.output:
            raise IPyradWarningExit("  -o output_prefix required")
        if not args.seq:
            raise IPyradWarningExit("  -s sequence file required")
        ## create new JSON (Assembly) object
        data = ip.Assembly(args.output, quiet=True)
        data.outfiles.svdinput = args.seq
        data.set_params(1, "./")

        ## parse samples from the sequence file
        names = []
        with iter(open(args.seq, 'r')) as infile:
            infile.next().strip().split()
            while 1:
                try:
                    names.append(infile.next().split()[0])
                except StopIteration:
                    break
        ## store as Samples in Assembly
        data.samples = {name:ip.Sample(name) for name in names}

    ## store ipcluster info
    data._ipcluster["cores"] = args.cores

    if args.MPI:
        data._ipcluster["engines"] = "MPI"
    else:
        data._ipcluster["engines"] = "Local"

    ## launch ipcluster and register for later destruction
    data = ipcontroller_init(data)

    ## run svd4tet
    args = [data, args.boots, args.method, args.nquartets, args.force]
    data._clientwrapper(ipa.svd4tet.run, args, 45)
Ejemplo n.º 2
0
def load_assembly(name, controller="Local", quiet=False, launch=False):
    """ loads an ipython dill pickled Assembly object """
    ## flexible name entry
    locations = [name]
    locations.append(name + ".assembly")

    ## does Assembly saved obj exist?
    for name in locations:
        try:
            ## load in the Assembly object
            with open(name, "rb") as pickin:
                data = dill.load(pickin)

            ## will raise Attribute error if not loaded
            fullcurdir = os.path.realpath(os.path.curdir)
            name = name.replace(fullcurdir, ".")
            if not quiet:
                print("  loading Assembly: {} [{}]".\
                      format(data.name, name))

            ## Test if our assembly is currently up to date
            ## How to deal with assembly objects falling out of synch with the
            ## currently assembly params in the code. If forceupdate is on
            ## then update the loaded assembly to the current version. If
            ## forceupdate is not on, then test to check if the loaded
            ## assembly is current. If it is then fine, if not bail out with
            ## a hopefully useful error message.
            if test_assembly(data):
                print("  Attempting to update assembly to newest version.")
                data = update_assembly(data)

            ## relaunch ipcluster
            if launch:
                data._ipclusterid = ipcontroller_init(nproc="",
                                                      controller=controller,
                                                      quiet=quiet)
            else:
                data._ipclusterid = ""

        except (IOError, AttributeError):
            pass

    try:
        return data
    except UnboundLocalError:
        raise AssertionError(
            "Attempting to load assembly. File not found: {}".format(name))
Ejemplo n.º 3
0
    def __init__(self, name, controller="Local"):

        ## obj name
        self.name = name
        print("New Assembly object `{}` created".format(self.name))

        ## launch ipcluster and register for later destruction
        self.__ipname__ = ipcontroller_init(controller)

        ## get binaries of dependencies
        self.vsearch, self.muscle, self.smalt, self.samtools = getbins()

        ## link a log history of executed workflow
        self.log = []
        self._stamp(self.name + " created")
        self.statsfiles = ObjDict()

        ## samples linked
        self.samples = ObjDict()

        ## multiplex files linked
        self.barcodes = ObjDict()

        ## an object for storing data directories for this Assembly
        self.dirs = ObjDict()

        ## the default params dict
        self.paramsdict = OrderedDict([
            ("working_directory", os.path.realpath(os.path.curdir)),
            ("raw_fastq_path",
             os.path.join(os.path.realpath(os.path.curdir), "*.fastq")),
            ("barcodes_path",
             os.path.join(os.path.realpath(os.path.curdir), "*.barcodes.txt")),
            ("sorted_fastq_path", ""), ("restriction_overhang", ("TGCAG", "")),
            ("max_low_qual_bases", 5), ("engines_per_job", 4),
            ("mindepth_statistical", 6), ("mindepth_majrule", 6),
            ("datatype", 'rad'), ("clust_threshold", .85), ("minsamp", 4),
            ("max_shared_heterozygosity", .25), ("prefix_outname", self.name),
            ("phred_Qscore_offset", 33), ("max_barcode_mismatch", 1),
            ("filter_adapters", 0), ("filter_min_trim_len", 35), ("ploidy", 2),
            ("max_stack_size", 1000), ("max_Ns_consens", (5, 5)),
            ("max_Hs_consens", (8, 8)), ("max_SNPs_locus", (100, 100)),
            ("max_Indels_locus", (5, 99)), ("trim_overhang", (1, 2, 2, 1)),
            ("hierarchical_clustering", 0), ("assembly_method", "denovo"),
            ("reference_sequence", "")
        ])
Ejemplo n.º 4
0
def load_assembly(name, controller="Local"):
    """ loads an ipython pickled Assembly object """
    ## flexible name entry
    if ".assembly" not in name:
        name += ".assembly"

    ## does Assembly save obj exist?
    if not os.path.exists(name):
        print("cannot find", name, "try entering the full path to file.")

    else:
        ## load in the Assembly object
        with open(name, "rb") as pickin:
            data = dill.load(pickin)
        ## relaunch ipcluster
        data.__ipname__ = ipcontroller_init(controller)

        return data
Ejemplo n.º 5
0
    def __init__(self, name, controller="Local"):

        ## obj name
        self.name = name    
        print("New Assembly object `{}` created".format(self.name))

        ## launch ipcluster and register for later destruction
        self.__ipname__ = ipcontroller_init(controller)

        ## get binaries of dependencies
        self.vsearch, self.muscle, self.smalt, self.samtools = getbins()

        ## link a log history of executed workflow
        self.log = []
        self._stamp(self.name+" created")
        self.statsfiles = ObjDict()

        ## samples linked 
        self.samples = ObjDict()

        ## multiplex files linked
        self.barcodes = ObjDict()

        ## an object for storing data directories for this Assembly
        self.dirs = ObjDict()

        ## the default params dict
        self.paramsdict = OrderedDict([
                       ("working_directory", os.path.realpath(
                                                os.path.curdir)),
                       ("raw_fastq_path", os.path.join(
                                            os.path.realpath(
                                                 os.path.curdir),
                                                 "*.fastq")),
                       ("barcodes_path", os.path.join(
                                            os.path.realpath(
                                                os.path.curdir),
                                                "*.barcodes.txt")),
                       ("sorted_fastq_path", ""),
                       ("restriction_overhang", ("TGCAG", "")),
                       ("max_low_qual_bases", 5),
                       ("engines_per_job", 4),
                       ("mindepth_statistical", 6), 
                       ("mindepth_majrule", 6), 
                       ("datatype", 'rad'), 
                       ("clust_threshold", .85),
                       ("minsamp", 4), 
                       ("max_shared_heterozygosity", .25), 
                       ("prefix_outname", self.name),
                       ("phred_Qscore_offset", 33),
                       ("max_barcode_mismatch", 1),
                       ("filter_adapters", 0), 
                       ("filter_min_trim_len", 35), 
                       ("ploidy", 2), 
                       ("max_stack_size", 1000),
                       ("max_Ns_consens", (5, 5)), 
                       ("max_Hs_consens", (8, 8)), 
                       ("max_SNPs_locus", (100, 100)), 
                       ("max_Indels_locus", (5, 99)), 
                       ("trim_overhang", (1, 2, 2, 1)), 
                       ("hierarchical_clustering", 0),
                       ("assembly_method", "denovo"),
                       ("reference_sequence", "")
        ])
Ejemplo n.º 6
0
def main():
    """ main function """

    ## parse params file input (returns to stdout if --help or --version)
    args = parse_command_line()

    ## create new paramsfile if -n
    if args.new:

        ## Create a tmp assembly and call write_params to write out
        ## default params.txt file
        try:
            tmpassembly = ip.core.assembly.Assembly("")
            tmpassembly.write_params("params.txt", force=args.force)
        except Exception as e:
            print(e)
            print("\nUse --force to overwrite\n")
            sys.exit(2)

        print("New file `params.txt` created in {}".\
               format(os.path.realpath(os.path.curdir)))

        sys.exit(2)

    ## if showing results, do not do any steps and do not print header
    if args.results:
        args.steps = ""
        print("")
    else:
        header = \
    "\n --------------------------------------------------"+\
    "\n  ipyrad [v.{}]".format(ip.__version__)+\
    "\n  Interactive assembly and analysis of RADseq data"+\
    "\n --------------------------------------------------"
        print(header)

    ## create new Assembly or load existing Assembly, quit if args.results
    if args.params:
        parsedict = parse_params(args)

        if args.results:
            showstats(parsedict)

        else:
            ## run Assembly steps
            if args.steps:
                ## launch ipcluster and register for later destruction
                if args.MPI:
                    controller = "MPI"
                #elif args.PBS:
                #    controller = "PBS"
                else:
                    controller = "Local"

                ## launch or load assembly with custom profile/pid
                data = getassembly(args, parsedict)

                ## might want to use custom profiles instead of ...
                data._ipclusterid = ipcontroller_init(nproc=args.cores,
                                                      controller=controller)
                ## set to print headers
                data._headers = 1

                ## run assembly steps
                steps = list(args.steps)
                data.run(steps=steps, force=args.force, preview=args.preview)
Ejemplo n.º 7
0
def main():
    """ main function """
    ## turn off traceback for the CLI
    ip.__interactive__ = 0

    ## parse params file input (returns to stdout if --help or --version)
    args = parse_command_line()

    ## create new paramsfile if -n
    if args.new:
        ## Create a tmp assembly and call write_params to write out
        ## default params.txt file
        try:
            tmpassembly = ip.Assembly(args.new, quiet=True)
            tmpassembly.write_params("params-{}.txt".format(args.new), 
                                     force=args.force)
        except Exception as inst:
            print(inst)
            sys.exit(2)

        print("\n    New file `params-{}.txt` created in {}\n".\
               format(args.new, os.path.realpath(os.path.curdir)))
        sys.exit(2)


    ## if params then must provide action argument with it
    if args.params:
        if not any([args.branch, args.results, args.steps]):
            print("""
    Must provide action argument along with -p argument for params file. 
    e.g., ipyrad -p params-test.txt -r      ## shows results
    e.g., ipyrad -p params-test.txt -s 12   ## runs steps 1 & 2
    """)
            sys.exit(2)


    ## if branching or info do not allow steps in same command, print spacer
    if any([args.branch, args.info]):        
        args.steps = ""    
        print("")    

    ## always print the header when doing steps
    header = \
    "\n --------------------------------------------------"+\
    "\n  ipyrad [v.{}]".format(ip.__version__)+\
    "\n  Interactive assembly and analysis of RADseq data"+\
    "\n --------------------------------------------------"

    ## if info print the info and exit        
    if not args.info == False:
        if args.info:
            ip.paramsinfo(int(args.info))
        else:
            ip.paramsinfo()
        sys.exit(1)

    ## create new Assembly or load existing Assembly, quit if args.results
    elif args.params:
        parsedict = parse_params(args)

        if args.branch:
            branch_assembly(args, parsedict)

        elif args.steps:
            ## print header
            print(header)

            ## run Assembly steps
            ## launch or load assembly with custom profile/pid
            data = getassembly(args, parsedict)

            ## if cores was entered, limit cores to this number
            ## otherwise use all available cores. By default _ipcluster[cores] 
            ## is set to detect_cpus in Assembly.__init__)
            if args.cores:
                data.cpus = args.cores

            if args.MPI:
                data._ipcluster["engines"] = "MPI"
            else:
                data._ipcluster["engines"] = "Local"

            ## launch ipcluster and register for later destruction
            data = ipcontroller_init(data)

            ## set to print headers
            data._headers = 1

            ## run assembly steps
            steps = list(args.steps)
            data.run(steps=steps, force=args.force, preview=args.preview)

        if args.results:
            showstats(parsedict)
Ejemplo n.º 8
0
def main():
    """ main function """
    ## turn off traceback for the CLI
    ip.__interactive__ = 0

    ## parse params file input (returns to stdout if --help or --version)
    args = parse_command_line()

    ## create new paramsfile if -n
    if args.new:
        ## Create a tmp assembly and call write_params to write out
        ## default params.txt file
        try:
            tmpassembly = ip.Assembly(args.new, quiet=True)
            tmpassembly.write_params("params-{}.txt".format(args.new),
                                     force=args.force)
        except Exception as inst:
            print(inst)
            sys.exit(2)

        print("\n    New file `params-{}.txt` created in {}\n".\
               format(args.new, os.path.realpath(os.path.curdir)))
        sys.exit(2)

    ## if params then must provide action argument with it
    if args.params:
        if not any([args.branch, args.results, args.steps]):
            print("""
    Must provide action argument along with -p argument for params file. 
    e.g., ipyrad -p params-test.txt -r      ## shows results
    e.g., ipyrad -p params-test.txt -s 12   ## runs steps 1 & 2
    """)
            sys.exit(2)

    ## if branching or info do not allow steps in same command, print spacer
    if any([args.branch, args.info]):
        args.steps = ""
        print("")

    ## always print the header when doing steps
    header = \
    "\n --------------------------------------------------"+\
    "\n  ipyrad [v.{}]".format(ip.__version__)+\
    "\n  Interactive assembly and analysis of RADseq data"+\
    "\n --------------------------------------------------"

    ## if info print the info and exit
    if not args.info == False:
        if args.info:
            ip.paramsinfo(int(args.info))
        else:
            ip.paramsinfo()
        sys.exit(1)

    ## create new Assembly or load existing Assembly, quit if args.results
    elif args.params:
        parsedict = parse_params(args)

        if args.branch:
            branch_assembly(args, parsedict)

        elif args.steps:
            ## print header
            print(header)

            ## run Assembly steps
            ## launch or load assembly with custom profile/pid
            data = getassembly(args, parsedict)

            ## if cores was entered, limit cores to this number
            ## otherwise use all available cores. By default _ipcluster[cores]
            ## is set to detect_cpus in Assembly.__init__)
            if args.cores:
                data.cpus = args.cores

            if args.MPI:
                data._ipcluster["engines"] = "MPI"
            else:
                data._ipcluster["engines"] = "Local"

            ## launch ipcluster and register for later destruction
            data = ipcontroller_init(data)

            ## set to print headers
            data._headers = 1

            ## run assembly steps
            steps = list(args.steps)
            data.run(steps=steps, force=args.force, preview=args.preview)

        if args.results:
            showstats(parsedict)