Esempio n. 1
0
    def __init__(self, name, **kwargs):

        # If using CLI then "cli" is included in kwargs
        self._cli = False
        if kwargs.get("cli"):
            self._cli = True

        # this is False and only updated during .run()
        self.quiet = False

        # No special characters in assembly name
        check_name(name)
        self.name = name
        if (not kwargs.get("quiet")) and (not self._cli):
            self._print("New Assembly: {}".format(self.name))

        # Default ipcluster launch info
        self.ipcluster = {
            "cluster_id": "",
            "profile": "default",
            "engines": "Local",
            "quiet": 0,
            "timeout": 120,
            "cores": 0,  # detect_cpus(),
            "threads": 2,
            "pids": {},
        }
        # ipcluster settings can be set during init using kwargs
        for key, val in kwargs.items():
            if key in self.ipcluster:
                self.ipcluster[key] = val

        # statsfiles is a dict with file locations
        # stats_dfs is a dict with pandas dataframes
        self.stats_files = ObjDict({})
        self.stats_dfs = ObjDict({})

        # samples linked {sample-name: sample-object}
        self.samples = {}

        # populations {popname: poplist}
        self.populations = {}

        # multiplex files linked
        self.barcodes = {}

        # outfiles locations
        self.outfiles = ObjDict()
        self.outfiles.loci = ""

        # storing supercatg file
        self.clust_database = ""
        self.database = ""

        ## the default params dict
        self.params = Params(self)
        self.hackersonly = Hackers()

        ## Store data directories for this Assembly. Init with default project
        self.dirs = ObjDict({
            "project":
            os.path.realpath(self.params.project_dir),
            "fastqs":
            "",
            "edits":
            "",
            "clusts":
            "",
            "consens":
            "",
            "across":
            "",
            "outfiles":
            "",
        })
Esempio n. 2
0
    def __init__(self, name=""):
        self.name = name
        self.barcode = ""

        # link to files
        self.files = ObjDict({
            "fastqs": [],
            "edits": [],
            "mapped_reads": [],
            "unmapped_reads": [],
            "clusters": [],
            "consens": [],
            "database": []
        })

        ## summary stats dictionary
        self.stats = pd.Series(index=[
            "state",
            "reads_raw",
            "reads_passed_filter",
            "reads_merged",
            "refseq_mapped_reads",
            "refseq_unmapped_reads",
            "clusters_total",
            "clusters_hidepth",
            "hetero_est",
            "error_est",
            "reads_consens",
        ],
                               dtype=object)

        ## stats for each step
        self.stats_dfs = ObjDict({
            "s1":
            pd.Series(index=[
                "reads_raw",
            ], dtype=object),
            "s2":
            pd.Series(index=[
                "reads_raw",
                "trim_adapter_bp_read1",
                "trim_adapter_bp_read2",
                "trim_quality_bp_read1",
                "trim_quality_bp_read2",
                "reads_filtered_by_Ns",
                "reads_filtered_by_minlen",
                "reads_passed_filter",
            ],
                      dtype=object),
            #"filtered_by_qscore",
            #"filtered_by_adapter",
            "s3":
            pd.Series(index=[
                "merged_pairs",
                "clusters_total",
                "hidepth_min",
                "clusters_hidepth",
                "avg_depth_total",
                "avg_depth_mj",
                "avg_depth_stat",
                "sd_depth_total",
                "sd_depth_mj",
                "sd_depth_stat",
                "filtered_bad_align",
            ],
                      dtype=object),
            "s4":
            pd.Series(index=[
                "hetero_est",
                "error_est",
            ], dtype=object),
            "s5":
            pd.Series(index=[
                "clusters_total",
                "filtered_by_depth",
                "filtered_by_maxH",
                "filtered_by_maxAlleles",
                "filtered_by_maxN",
                "reads_consens",
                "nsites",
                "nhetero",
                "heterozygosity",
            ],
                      dtype=object),
        })

        ## store cluster depth information (biggest memory cost),
        self.depths = {}