Beispiel #1
0
def getassembly(args, parsedict):
    """ 
    loads assembly or creates a new one and set its params from 
    parsedict. Does not launch ipcluster. 
    """

    ## Creating an assembly with a full path in the name will "work"
    ## but it is potentially dangerous, so here we have assembly_name
    ## and assembly_file, name is used for creating new in cwd, file is
    ## used for loading existing.
    ##
    ## Be nice if the user includes the extension.
    #project_dir = ip.core.assembly._expander(parsedict['1'])
    #assembly_name = parsedict['0']
    project_dir = ip.core.assembly._expander(parsedict['project_dir'])
    assembly_name = parsedict['assembly_name']
    assembly_file = os.path.join(project_dir, assembly_name)

    ## Assembly creation will handle error checking  on
    ## the format of the assembly_name

    ## make sure the working directory exists.
    if not os.path.exists(project_dir):
        os.mkdir(project_dir)

    try:
        ## If 1 and force then go ahead and create a new assembly
        if ('1' in args.steps) and args.force:
            data = ip.Assembly(assembly_name, cli=True)
        else:
            data = ip.load_json(assembly_file, cli=True)
            data._cli = True

    except IPyradWarningExit as _:
        ## if no assembly is found then go ahead and make one
        if '1' not in args.steps:
            raise IPyradWarningExit(\
                "  Error: You must first run step 1 on the assembly: {}"\
                .format(assembly_file))
        else:
            ## create a new assembly object
            data = ip.Assembly(assembly_name, cli=True)

    ## for entering some params...
    for param in parsedict:

        ## trap assignment of assembly_name since it is immutable.
        if param == "assembly_name":
            ## Raise error if user tried to change assembly name
            if parsedict[param] != data.name:
                data.set_params(param, parsedict[param])
        else:
            ## all other params should be handled by set_params
            try:
                data.set_params(param, parsedict[param])
            except IndexError as _:
                print("  Malformed params file: {}".format(args.params))
                print("  Bad parameter {} - {}".format(param, parsedict[param]))
                sys.exit(-1)
    return data
Beispiel #2
0
def getassembly(args, parsedict):
    """ loads assembly or creates a new one and set its params from 
    parsedict. Does not launch ipcluster. 
    """

    ## Creating an assembly with a full path in the name will "work"
    ## but it is potentially dangerous, so here we have assembly_name
    ## and assembly_file, name is used for creating new in cwd, file is
    ## used for loading existing.
    ##
    ## Be nice if the user includes the extension.
    project_dir = ip.core.assembly.expander(parsedict['1'])
    assembly_name = parsedict['0']
    assembly_file = os.path.join(project_dir, assembly_name)

    ## Assembly creation will handle error checking  on
    ## the format of the assembly_name

    ## make sure the working directory exists.
    if not os.path.exists(project_dir):
        os.mkdir(project_dir)

    try:

        ## If 1 and force then go ahead and create a new assembly
        if '1' in args.steps and args.force:
            data = ip.Assembly(assembly_name)
        else:
            data = ip.load_json(assembly_file)

    except IPyradWarningExit as inst:
        ## if no assembly is found then go ahead and make one
        if '1' not in args.steps:
            raise IPyradWarningExit("""
    Error: Steps >1 ({}) requested but no current assembly found - {}
    """.format(args.steps, assembly_file))
        else:
            ## create a new assembly object
            data = ip.Assembly(assembly_name)

    ## for entering some params...
    for param in parsedict:
        ## trap assignment of assembly_name since it is immutable.
        if param == str(0):
            ## only pass to set_params if user tried to change assembly_name
            ## it will raise an Exit error
            if parsedict[param] != data.name:
                data.set_params(param, parsedict[param])
        else:
            ## all other params should be handled by set_params
            data.set_params(param, parsedict[param])

    return data
Beispiel #3
0
def parse_params(params):
    """ Parse the params file args, create and return Assembly object."""
    ## check that params.txt file is correctly formatted.
    with open(params) as paramsin:
        plines = paramsin.readlines()

    ## check header: big version changes can be distinguished by the header
    assert len(plines[0].split()[0]) == 6, \
    "params file is not compatible with ipyrad v.{}.".format(ip.__version__) \
    + "Create a new params file with: ipyrad -n"

    ## check length
    assert len(plines) > 30, "params file error. Format not recognized."

    ## make into a dict
    items = [i.split("##")[0].strip() for i in plines[1:]]
    keys = range(1, 30)
    parsedict = {str(i):j for i, j in zip(keys, items)}

    ## create a default Assembly object
    print('parsedict:\n', parsedict)
    data = ip.Assembly(parsedict['14'])
    data.set_params("datatype", parsedict['10'])

    ## set_params for all keys in parsedict. There may be a preferred order
    ## for entering some params, e.g., datatype to know if data are paired.
    for param in parsedict:
        data.set_params(param, parsedict[param])

    return data
Beispiel #4
0
def test_assembly(data):
    """ Check to see if the assembly you're trying to load is concordant
        with the current assembly version. Basically it creates a new tmp
        assembly and tests whether the paramsdicts are the same. It also
        tests the _hackersonly dict."""

    new_assembly = ip.Assembly(data.name, quiet=True)
    new_params = set(new_assembly.paramsdict.keys())

    my_params = set(data.paramsdict.keys())

    ## Find all params that are in the new paramsdict and not in the old one.
    params_diff = new_params.difference(my_params)

    result = False
    if params_diff:
        result = True

    ## Test hackersonly dict as well.
    my_hackerdict = set(data._hackersonly.keys())
    new_hackerdict = set(new_assembly._hackersonly.keys())
    hackerdict_diff = new_hackerdict.difference(my_hackerdict)

    if hackerdict_diff:
        result = True

    return result
Beispiel #5
0
def main():
    """ main function """
    ## not in ipython
    ip.__interactive__ = 0

    header = \
    "\n --------------------------------------------------"+\
    "\n  Analysis tools for ipyrad [v.{}]".format(ip.__version__)+\
    "\n  svd4tet -- fast quartet and tree inference "+\
    "\n --------------------------------------------------"
    print(header)

    ## parse params file input (returns to stdout if --help or --version)
    args = parse_command_line()

    ## if JSON, load it
    if args.json:
        data = ip.load_json(args.json)
        data.outfiles.svdinput = data.outfiles.svdinput

    ## else create a tmp assembly for the seqarray
    else:
        if not args.output:
            raise IPyradWarningExit("  -o output_prefix required")
        if not args.seq:
            raise IPyradWarningExit("  -s sequence file required")
        ## create new JSON (Assembly) object
        data = ip.Assembly(args.output, quiet=True)
        data.outfiles.svdinput = args.seq
        data.set_params(1, "./")

        ## parse samples from the sequence file
        names = []
        with iter(open(args.seq, 'r')) as infile:
            infile.next().strip().split()
            while 1:
                try:
                    names.append(infile.next().split()[0])
                except StopIteration:
                    break
        ## store as Samples in Assembly
        data.samples = {name:ip.Sample(name) for name in names}

    ## store ipcluster info
    data._ipcluster["cores"] = args.cores

    if args.MPI:
        data._ipcluster["engines"] = "MPI"
    else:
        data._ipcluster["engines"] = "Local"

    ## launch ipcluster and register for later destruction
    data = ipcontroller_init(data)

    ## run svd4tet
    args = [data, args.boots, args.method, args.nquartets, args.force]
    data._clientwrapper(ipa.svd4tet.run, args, 45)
def getassembly(args, parsedict):
    """ loads assembly or creates a new one and set its params from 
    parsedict. Does not launch ipcluster. 
    """

    working_directory = parsedict['1']
    assembly_name = os.path.split(parsedict['1'])[1]
    assembly_file = os.path.join(working_directory, assembly_name)

    ## make sure the working directory exists.
    if not os.path.exists(working_directory):
        os.mkdir(working_directory)

    os.chdir(working_directory)

    ## if forcing or doing step 1 then do not load existing Assembly
    if args.force and '1' in args.steps:
        ## create a new assembly object
        data = ip.Assembly(assembly_name)
    else:
        ## try loading an existing one
        try:
            #print("Loading - {}".format(assembly_name))
            data = ip.load.load_assembly(assembly_file, launch=False)

        ## if not found then create a new one
        except AssertionError:
            LOGGER.info("No current assembly found, create new - {}".\
                        format(assembly_file))
            data = ip.Assembly(assembly_name)

    ## for entering some params...
    for param in parsedict:
        if parsedict[param]:
            try:
                data.set_params(param, parsedict[param])
            except Exception as inst:
                print(inst)
                print("Bad parameter in the params file - param {} value {}".\
                      format(param, parsedict[param]))
                raise

    return data
Beispiel #7
0
    def get_assembly(self):
        """ 
        loads assembly or creates a new one and set its params from 
        parsedict. Does not launch ipcluster. 
        """
        # Be nice if the user includes the extension.
        project_dir = self.parsedict['project_dir']
        assembly_name = self.parsedict['assembly_name']
        json_file = os.path.join(project_dir, assembly_name)
        if not json_file.endswith(".json"):
            json_file += ".json"

        # make sure the working directory exists.
        if not os.path.exists(project_dir):
            os.mkdir(project_dir)

        # Create new Assembly instead of loading if NEW
        if self.args.steps:
            # starting a new assembly
            if '1' in self.args.steps:
                if self.args.force:
                    data = ip.Assembly(assembly_name, cli=True)
                else:
                    if os.path.exists(json_file):
                        raise IPyradError(
                            "Assembly already exists, use force to overwrite")
                    else:
                        data = ip.Assembly(assembly_name, cli=True)
            else:
                data = ip.load_json(json_file, cli=True)
        else:
            data = ip.load_json(json_file, cli=True)

        # Update json assembly with params in paramsfile in case they changed
        for key, param in self.parsedict.items():
            if key not in ["assembly_name"]:
                data.set_params(key, param)

        # store it.
        self.data = data
Beispiel #8
0
def update_assembly(data):
    """ 
    Create a new Assembly() and convert as many of our old params to the new
    version as we can. Also report out any parameters that are removed
    and what their values are. 
    """

    print("##############################################################")
    print("Updating assembly to current version")
    ## New assembly object to update pdate from.
    new_assembly = ip.Assembly("update", quiet=True)

    ## Hackersonly dict gets automatically overwritten
    ## Always use the current version for params in this dict.
    data._hackersonly = deepcopy(new_assembly._hackersonly)

    new_params = set(new_assembly.paramsdict.keys())

    my_params = set(data.paramsdict.keys())

    ## Find all params in loaded assembly that aren't in the new assembly.
    ## Make a new dict that doesn't include anything in removed_params
    removed_params = my_params.difference(new_params)
    for i in removed_params:
        print("Removing parameter: {} = {}".format(i, data.paramsdict[i]))

    ## Find all params that are in the new paramsdict and not in the old one.
    ## If the set isn't emtpy then we create a new dictionary based on the new
    ## assembly parameters and populated with currently loaded assembly values.
    ## Conditioning on not including any removed params. Magic.
    added_params = new_params.difference(my_params)
    for i in added_params:
        print("Adding parameter: {} = {}".format(i,
                                                 new_assembly.paramsdict[i]))

    print("\nPlease take note of these changes. Every effort is made to\n"\
            +"ensure compatibility across versions of ipyrad. See online\n"\
            +"documentation for further details about new parameters.")
    time.sleep(5)
    print("##############################################################")

    if added_params:
        for i in data.paramsdict:
            if i not in removed_params:
                new_assembly.paramsdict[i] = data.paramsdict[i]
        data.paramsdict = deepcopy(new_assembly.paramsdict)

    data.save()
    return data
Beispiel #9
0
def parse_params(args):
    """ Parse the params file args, create and return Assembly object."""

    ## check that params.txt file is correctly formatted.
    try:
        with open(args.params) as paramsin:
            plines = paramsin.readlines()
    except IOError as _:
        sys.exit("  No params file found")

    ## check header: big version changes can be distinguished by the header
    legacy_version = 0
    try:
        ## try to update the Assembly ...
        legacy_version = 1
        if not len(plines[0].split()[0]) == 7:
            raise IPyradWarningExit("""
        Error: file '{}' is not compatible with ipyrad v.{}.
        Please create and update a new params file using the -n argument. 
        For info on which parameters have changed see the changelog:
        (http://ipyrad.readthedocs.io/releasenotes.html)
        """.format(args.params, ip.__version__))

    except IndexError:
        raise IPyradWarningExit("""
        Error: Params file should not have any empty lines at the top
        of the file. Verify there are no blank lines and rerun ipyrad.
        Offending file - {}
        """.format(args.params))

    ## update and backup
    if legacy_version:
        #which version...
        #update_to_6()
        pass

    ## make into a dict. Ignore blank lines at the end of file
    ## Really this will ignore all blank lines
    items = [
        i.split("##")[0].strip() for i in plines[1:] if not i.strip() == ""
    ]

    #keys = [i.split("]")[-2][-1] for i in plines[1:]]
    #keys = range(len(plines)-1)
    keys = ip.Assembly('null', quiet=True).paramsdict.keys()
    parsedict = {str(i): j for i, j in zip(keys, items)}
    return parsedict
Beispiel #10
0
    def _flagnew(self):
        # Create a tmp assembly, call write_params to make default params.txt
        tmpassembly = ip.Assembly(
            self.args.new,
            quiet=True,
            cli=True,
            force=self.args.force,
        )

        # write the new params file
        tmpassembly.write_params(
            "params-{}.txt".format(self.args.new),
            force=self.args.force,
        )

        # print log to screen
        print("\n  New file 'params-{}.txt' created in {}\n".format(
            self.args.new, os.path.realpath(os.path.curdir)))
Beispiel #11
0
    def parse_params(self):
        "Parse the params file args, create and return Assembly object."

        # check that params.txt file is correctly formatted.
        if not self.args.params:
            raise IPyradError("\n  No params file found\n")
        elif not os.path.exists(self.args.params):
            raise IPyradError("\n  No params file found\n")
        else:
            with open(self.args.params) as paramsin:
                lines = paramsin.readlines()

        # get values from the params file lines
        vals = [i.split("##")[0].strip() for i in lines[1:] if i.strip()]

        # get keys in order from a tmp assembly
        keys = [i[1:] for i in ip.Assembly('null', quiet=True).params._keys]
        
        # store as a dict
        self.parsedict = {str(i): j for (i, j) in zip(keys, vals)}
Beispiel #12
0
def main():
    """ main function """
    ## turn off traceback for the CLI
    ip.__interactive__ = 0

    ## Check for a new version on anaconda
    _check_version()

    ## parse params file input (returns to stdout if --help or --version)
    args = parse_command_line()

    ## Turn the debug output written to ipyrad_log.txt up to 11!
    ## Clean up the old one first, it's cleaner to do this here than
    ## at the end (exceptions, etc)
    if os.path.exists(ip.__debugflag__):
        os.remove(ip.__debugflag__)

    if args.debug:
        print("\n  ** Enabling debug mode ** ")
        ip._debug_on()
        atexit.register(ip._debug_off)        

    ## create new paramsfile if -n
    if args.new:
        ## Create a tmp assembly, call write_params to make default params.txt
        try:
            tmpassembly = ip.Assembly(args.new, quiet=True, cli=True)
            tmpassembly.write_params("params-{}.txt".format(args.new), 
                                     force=args.force)
        except Exception as inst:
            print(inst)
            sys.exit(2)

        print("\n  New file 'params-{}.txt' created in {}\n".\
               format(args.new, os.path.realpath(os.path.curdir)))
        sys.exit(2)


    ## if params then must provide action argument with it
    if args.params:
        if not any([args.branch, args.results, args.steps]):
            print("""
    Must provide action argument along with -p argument for params file. 
    e.g., ipyrad -p params-test.txt -r              ## shows results
    e.g., ipyrad -p params-test.txt -s 12           ## runs steps 1 & 2
    e.g., ipyrad -p params-test.txt -b newbranch    ## branch this assembly
    """)
            sys.exit(2)

    if not args.params:
        if any([args.branch, args.results, args.steps]):
            print("""
    Must provide params file for branching, doing steps, or getting results.
    e.g., ipyrad -p params-test.txt -r              ## shows results
    e.g., ipyrad -p params-test.txt -s 12           ## runs steps 1 & 2
    e.g., ipyrad -p params-test.txt -b newbranch    ## branch this assembly
    """)

    ## if branching, or merging do not allow steps in same command
    ## print spacer
    if any([args.branch, args.merge]):        
        args.steps = ""    
        print("")    

    ## always print the header when doing steps
    header = \
    "\n -------------------------------------------------------------"+\
    "\n  ipyrad [v.{}]".format(ip.__version__)+\
    "\n  Interactive assembly and analysis of RAD-seq data"+\
    "\n -------------------------------------------------------------"

    ## Log the current version. End run around the LOGGER
    ## so it'll always print regardless of log level.
    with open(ip.__debugfile__, 'a') as logfile:
        logfile.write(header)
        logfile.write("\n  Begin run: {}".format(time.strftime("%Y-%m-%d %H:%M")))
        logfile.write("\n  Using args {}".format(vars(args)))
        logfile.write("\n  Platform info: {}".format(os.uname()))

    ## if merging just do the merge and exit
    if args.merge:
        print(header)
        merge_assemblies(args)
        sys.exit(1)

    ## if download data do it and then exit. Runs single core in CLI. 
    if args.download:
        if len(args.download) == 1:
            downloaddir = "sra-fastqs"
        else:
            downloaddir = args.download[1]
        sratools_download(args.download[0], workdir=downloaddir, force=args.force)
        sys.exit(1)

    ## create new Assembly or load existing Assembly, quit if args.results
    elif args.params:
        parsedict = parse_params(args)

        if args.branch:
            branch_assembly(args, parsedict)

        elif args.steps:
            ## print header
            print(header)

            ## Only blank the log file if we're actually going to run a new
            ## assembly. This used to be in __init__, but had the side effect
            ## of occasionally blanking the log file in an undesirable fashion
            ## for instance if you run a long assembly and it crashes and
            ## then you run `-r` and it blanks the log, it's crazymaking.
            if os.path.exists(ip.__debugfile__):
                if os.path.getsize(ip.__debugfile__) > 50000000:
                    with open(ip.__debugfile__, 'w') as clear:
                        clear.write("file reset")

            ## run Assembly steps
            ## launch or load assembly with custom profile/pid
            data = getassembly(args, parsedict)

            ## set CLI ipcluster terms
            data._ipcluster["threads"] = args.threads

            ## if ipyclient is running (and matched profile) then use that one
            if args.ipcluster:
                ipyclient = ipp.Client(profile=args.ipcluster)
                data._ipcluster["cores"] = len(ipyclient)

            ## if not then we need to register and launch an ipcluster instance
            else:
                ## set CLI ipcluster terms
                ipyclient = None
                data._ipcluster["cores"] = args.cores if args.cores else detect_cpus()
                data._ipcluster["engines"] = "Local"
                if args.MPI:
                    data._ipcluster["engines"] = "MPI"
                    if not args.cores:
                        raise IPyradWarningExit("must provide -c argument with --MPI")
                ## register to have a cluster-id with "ip- name"
                data = register_ipcluster(data)

            ## set to print headers
            data._headers = 1

            ## run assembly steps
            steps = list(args.steps)
            data.run(
                steps=steps, 
                force=args.force, 
                preview=args.preview, 
                show_cluster=1, 
                ipyclient=ipyclient)
                     
        if args.results:
            showstats(parsedict)
Beispiel #13
0
        make_stats(data, raws)

    finally:
        ## cleans up chunk files and stats pickles
        tmpfiles = glob.glob(os.path.join(data.dirs.fastqs, "chunk*"))
        tmpfiles += glob.glob(os.path.join(data.dirs.fastqs, "tmp_*.gz"))
        tmpfiles += glob.glob(os.path.join(data.dirs.fastqs, "*.pickle"))
        if tmpfiles:
            for tmpfile in tmpfiles:
                os.remove(tmpfile)


if __name__ == "__main__":

    ## run test
    import ipyrad as ip
    #from ipyrad.core.assembly import Assembly

    ## get current location
    PATH = os.path.abspath(os.path.dirname(__file__))
    ## get location of test files
    IPATH = os.path.dirname(os.path.dirname(PATH))

    DATA = os.path.join(IPATH, "tests", "test_rad")
    TEST = ip.Assembly("test-demultiplex")
    #TEST = ip.load_assembly(os.path.join(DATA, "testrad"))
    TEST.set_params(1, "./")
    TEST.set_params(2, "./tests/data/sim_rad_test_R1_.fastq.gz")
    TEST.set_params(3, "./tests/data/sim_rad_test_barcodes.txt")
    TEST.step1()
Beispiel #14
0
def load_json(path, quiet=False):
    """ Load a json serialized object and ensure it matches to the current 
    Assembly object format """

    ## load the JSON string and try with name+.json
    checkfor = [path+".json", path]
    for inpath in checkfor:
        inpath = inpath.replace("~", os.path.expanduser("~"))
        try:
            with open(inpath, 'r') as infile:
                ## uses _tup_and_byte to ensure ascii and tuples are correct
                fullj = json.loads(infile.read(), object_hook=_tup_and_byte)
        except IOError:
            pass

    ## create a new empty Assembly
    try:
        oldname = fullj["assembly"].pop("name")
        olddir = fullj["assembly"]["dirs"]["project"]
        oldpath = os.path.join(olddir, os.path.splitext(oldname)[0]+".json")
        null = ip.Assembly(oldname, quiet=True)

    except (UnboundLocalError, AttributeError) as inst:
        raise IPyradWarningExit("""
    Could not find saved Assembly file (.json) in expected location.
    Checks in: [project_dir]/[assembly_name].json
    Checked: {}
    """.format(inpath))

    ## print msg with shortpath
    if not quiet:
        oldpath = oldpath.replace(os.path.expanduser("~"), "~")
        print("  loading Assembly: {}".format(oldname))
        print("  from saved path: {}".format(oldpath))

    ## First get the samples. Create empty sample dict of correct length 
    samplekeys = fullj["assembly"].pop("samples")
    null.samples = {name: "" for name in samplekeys}

    ## Next get paramsdict and use set_params to convert values back to 
    ## the correct dtypes. Allow set_params to fail because the object will 
    ## be subsequently updated by the params from the params file, which may
    ## correct any errors/incompatibilities in the old params file
    oldparams = fullj["assembly"].pop("paramsdict")
    for param, val in oldparams.iteritems():
        ## a fix for backward compatibility with deprecated options
        if param not in ["assembly_name", "excludes", "outgroups"]:
            try:
                null.set_params(param, val)
            except IPyradWarningExit as inst:
                #null.set_params(param, "")
                LOGGER.warning(""" 
    Load assembly error setting params. Not critical b/c new params file may
    correct the problem. Recorded here for debugging:
    {}
    """.format(inst))

    ## Import the hackersonly dict. In this case we don't have the nice
    ## set_params so we're shooting from the hip to reset the values
    try:
        oldhackersonly = fullj["assembly"].pop("_hackersonly")
        for param, val in oldhackersonly.iteritems():
            if val == None:
                null._hackersonly[param] = None
            else:
                null._hackersonly[param] = val

    except Exception as inst:
        LOGGER.warning("""
    Load assembly error resetting hackersonly dict element. We will just use
    the default value in the current assembly.""")
    #Here was the param that failed: {} - {}
    #The error: {}
    #""".format(param, val, inst))

    ## Check remaining attributes of Assembly and Raise warning if attributes
    ## do not match up between old and new objects
    newkeys = null.__dict__.keys()
    oldkeys = fullj["assembly"].keys()
    ## find shared keys and deprecated keys
    sharedkeys = set(oldkeys).intersection(set(newkeys))
    lostkeys = set(oldkeys).difference(set(newkeys))

    ## raise warning if there are lost/deprecated keys
    if lostkeys:
        LOGGER.warning("""
    load_json found {a} keys that are unique to the older Assembly.
        - assembly [{b}] v.[{c}] has: {d}
        - current assembly is v.[{e}]
        """.format(a=len(lostkeys), 
                   b=oldname,
                   c=fullj["assembly"]["_version"],
                   d=lostkeys,
                   e=null._version))

    ## load in remaining shared Assembly attributes to null
    for key in sharedkeys:
        null.__setattr__(key, fullj["assembly"][key])

    ## load in svd results if they exist
    try:
        if fullj["assembly"]["svd"]:
            null.__setattr__("svd", fullj["assembly"]["svd"])
            null.svd = ObjDict(null.svd)
    except Exception:
        LOGGER.debug("skipping: no svd results present in old assembly")

    ## Now, load in the Sample objects json dicts
    sample_names = fullj["samples"].keys()
    if not sample_names:
        raise IPyradWarningExit("""
    No samples found in saved assembly. If you are just starting a new
    assembly the file probably got saved erroneously, so it's safe to try 
    removing the assembly file (e.g., rm {}.json) and restarting.

    If you fully completed step 1 and you see this message you should probably
    contact the developers.
    """.format(inpath))
        
    sample_keys = fullj["samples"][sample_names[0]].keys()
    stats_keys = fullj["samples"][sample_names[0]]["stats"].keys()
    stats_dfs_keys = fullj["samples"][sample_names[0]]["stats_dfs"].keys()
    ind_statkeys = \
        [fullj["samples"][sample_names[0]]["stats_dfs"][i].keys() \
        for i in stats_dfs_keys]
    ind_statkeys = list(itertools.chain(*ind_statkeys))

    ## check against a null sample
    nsamp = ip.Sample()
    newkeys = nsamp.__dict__.keys()
    newstats = nsamp.__dict__["stats"].keys()
    newstatdfs = nsamp.__dict__["stats_dfs"].keys()
    newindstats = [nsamp.__dict__["stats_dfs"][i].keys() for i in newstatdfs]
    newindstats = list(itertools.chain(*[i.values for i in newindstats]))

    ## different in attributes?
    diffattr = set(sample_keys).difference(newkeys)
    diffstats = set(stats_keys).difference(newstats)
    diffindstats = set(ind_statkeys).difference(newindstats)

    ## Raise warning if any oldstats were lost or deprecated
    alldiffs = diffattr.union(diffstats).union(diffindstats)
    if any(alldiffs):
        LOGGER.warning("""
    load_json found {a} keys that are unique to the older Samples.
        - assembly [{b}] v.[{c}] has: {d}
        - current assembly is v.[{e}]
        """.format(a=len(alldiffs), 
                   b=oldname,
                   c=fullj["assembly"]["_version"],
                   d=alldiffs,
                   e=null._version))

    ## save stats and statsfiles to Samples
    for sample in null.samples:
        ## create a null Sample
        null.samples[sample] = ip.Sample()

        ## save stats
        sdat = fullj["samples"][sample]['stats']
        ## Reorder the keys so they ascend by step, only include
        ## stats that are actually in the sample. newstats is a
        ## list of the new sample stat names, and stats_keys
        ## are the names of the stats from the json file.
        newstats = [x for x in newstats if x in stats_keys]
        null.samples[sample].stats = pd.Series(sdat).reindex(newstats)

        ## save stats_dfs
        for statskey in stats_dfs_keys:
            null.samples[sample].stats_dfs[statskey] = \
                pd.Series(fullj["samples"][sample]["stats_dfs"][statskey])\
                .reindex(nsamp.__dict__["stats_dfs"][statskey].keys())

        ## save Sample files
        for filehandle in fullj["samples"][sample]["files"].keys():
            null.samples[sample].files[filehandle] = \
                fullj["samples"][sample]["files"][filehandle]


    ## build the Assembly object stats_dfs
    for statskey in stats_dfs_keys:
        indstat = null._build_stat(statskey)
        if not indstat.empty:
            null.stats_dfs[statskey] = indstat

    ## add remaning attributes to null Samples
    shared_keys = set(sample_keys).intersection(newkeys)
    shared_keys.discard("stats")
    shared_keys.discard("files")    
    shared_keys.discard("stats_files")
    shared_keys.discard("stats_dfs")

    for sample in null.samples:
        ## set the others
        for key in shared_keys:
            null.samples[sample].__setattr__(key, fullj["samples"][sample][key])

    ## ensure objects are object dicts
    null.dirs = ObjDict(null.dirs)
    null.stats_files = ObjDict(null.stats_files)
    null.stats_dfs = ObjDict(null.stats_dfs)    
    null.populations = ObjDict(null.populations)
    null.outfiles = ObjDict(null.outfiles)

    return null
Beispiel #15
0
def main():
    """ main function """
    ## turn off traceback for the CLI
    ip.__interactive__ = 0

    ## parse params file input (returns to stdout if --help or --version)
    args = parse_command_line()

    ## create new paramsfile if -n
    if args.new:
        ## Create a tmp assembly and call write_params to write out
        ## default params.txt file
        try:
            tmpassembly = ip.Assembly(args.new, quiet=True)
            tmpassembly.write_params("params-{}.txt".format(args.new),
                                     force=args.force)
        except Exception as inst:
            print(inst)
            sys.exit(2)

        print("\n    New file `params-{}.txt` created in {}\n".\
               format(args.new, os.path.realpath(os.path.curdir)))
        sys.exit(2)

    ## if params then must provide action argument with it
    if args.params:
        if not any([args.branch, args.results, args.steps]):
            print("""
    Must provide action argument along with -p argument for params file. 
    e.g., ipyrad -p params-test.txt -r      ## shows results
    e.g., ipyrad -p params-test.txt -s 12   ## runs steps 1 & 2
    """)
            sys.exit(2)

    ## if branching or info do not allow steps in same command, print spacer
    if any([args.branch, args.info]):
        args.steps = ""
        print("")

    ## always print the header when doing steps
    header = \
    "\n --------------------------------------------------"+\
    "\n  ipyrad [v.{}]".format(ip.__version__)+\
    "\n  Interactive assembly and analysis of RADseq data"+\
    "\n --------------------------------------------------"

    ## if info print the info and exit
    if not args.info == False:
        if args.info:
            ip.paramsinfo(int(args.info))
        else:
            ip.paramsinfo()
        sys.exit(1)

    ## create new Assembly or load existing Assembly, quit if args.results
    elif args.params:
        parsedict = parse_params(args)

        if args.branch:
            branch_assembly(args, parsedict)

        elif args.steps:
            ## print header
            print(header)

            ## run Assembly steps
            ## launch or load assembly with custom profile/pid
            data = getassembly(args, parsedict)

            ## if cores was entered, limit cores to this number
            ## otherwise use all available cores. By default _ipcluster[cores]
            ## is set to detect_cpus in Assembly.__init__)
            if args.cores:
                data.cpus = args.cores

            if args.MPI:
                data._ipcluster["engines"] = "MPI"
            else:
                data._ipcluster["engines"] = "Local"

            ## launch ipcluster and register for later destruction
            data = ipcontroller_init(data)

            ## set to print headers
            data._headers = 1

            ## run assembly steps
            steps = list(args.steps)
            data.run(steps=steps, force=args.force, preview=args.preview)

        if args.results:
            showstats(parsedict)
Beispiel #16
0
    ## update sample stats
    fsamplehits.update(samplehits)
    fbarhits.update(barhits)
    fmisses.update(misses)
    fdbars.update(dbars)

    statdicts = perfile, fsamplehits, fbarhits, fmisses, fdbars
    return statdicts


if __name__ == "__main__":

    ## run test
    import ipyrad as ip
    #from ipyrad.core.assembly import Assembly

    ## get current location
    #PATH = os.path.abspath(os.path.dirname(__file__))
    ## get location of test files
    #IPATH = os.path.dirname(os.path.dirname(PATH))
    #DATA = os.path.join(IPATH, "tests", "test_rad")

    TEST = ip.Assembly("profile_s1")
    TEST.set_params(1, "./maintest")
    TEST.set_params(2, "./ipsimdata/sim_rad_test_R1_.fastq.gz")
    TEST.set_params(3, "./ipsimdata/sim_rad_test_barcodes.txt")
    print(TEST.cpus)
    TEST.cpus = 4
    TEST.step1()