Esempio n. 1
0
    def __init__(self, config):
        """ check config file, make necessary directories, set up logger """
        super(prepare, self).__init__()
        self.config = combine_configs("prepare", config)

        try:
            assert (os.path.basename(os.getcwd()) == self.config["dir"])
        except AssertionError:
            print("Run this script from within the {} directory".format(
                self.config["dir"]))
            sys.exit(2)

        for p in [self.config["output_folder"]]:
            if not os.path.isdir(p):
                os.makedirs(p)

        self.log = logger(self.config["output_folder"], False)

        try:
            if self.config["segments"] == False:
                assert (len(self.config["input_paths"]) == 1)
                self.config["segments"] = ["genome"]
            else:
                assert (len(self.config["segments"]) == len(
                    self.config["input_paths"]))
        except AssertionError:
            self.log.fatal("Config file: # segments don't match # input paths")
        try:
            for p in self.config["input_paths"]:
                assert (os.path.exists(p))
        except AssertionError:
            self.log.fatal(
                "Config file: input path '{}' doesn't exist".format(p))

        # this block initialses this.segments
        if self.config["input_format"] == "fasta":
            self.load_fasta()
        else:
            self.log.fatal(
                "Currently only FASTA sequences can be loaded".format(
                    self.config["dir"]))
Esempio n. 2
0
    def __init__(self, config):
        """ check config file, make necessary directories, set up logger """
        super(process, self).__init__()
        self.config = combine_configs("process", config)

        try:
            assert (os.path.basename(os.getcwd()) == self.config["dir"])
        except AssertionError:
            print("Run this script from within the {} directory".format(
                self.config["dir"]))
            sys.exit(2)

        for p in self.config["output"].values():
            if not os.path.isdir(p):
                os.makedirs(p)

        self.log = logger(self.config["output"]["data"], False)

        # parse the JSON into different data bits
        try:
            with open(self.config["in"], 'r') as fh:
                data = json.load(fh)
        except Exception as e:
            self.log.fatal("Error loading JSON. Error: {}".format(e))

        self.info = data["info"]
        if "time_interval" in data["info"]:
            self.info["time_interval"] = [
                datetime.strptime(x, '%Y-%m-%d').date()
                for x in data["info"]["time_interval"]
            ]
        self.info["lineage"] = data["info"]["lineage"]

        if 'leaves' in data:
            self.tree_leaves = data['leaves']

        try:
            self.colors = data["colors"]
        except KeyError:
            self.log.notify("* colours have not been set")
            self.colors = False
        try:
            self.lat_longs = data["lat_longs"]
        except KeyError:
            self.log.notify("* latitude & longitudes have not been set")
            self.lat_longs = False

        # backwards compatability - set up file_dumps (need to rewrite sometime)
        # self.sequence_fname = self.input_data_path+'.fasta'
        self.file_dumps = {}
        self.output_path = os.path.join(self.config["output"]["data"],
                                        self.info["prefix"])
        self.file_dumps['seqs'] = self.output_path + '_sequences.pkl.gz'
        self.file_dumps['tree'] = self.output_path + '_tree.newick'
        self.file_dumps['nodes'] = self.output_path + '_nodes.pkl.gz'

        if self.config["clean"] == True:
            self.log.notify("Removing intermediate files for a clean build")
            for f in glob.glob(self.output_path + "*"):
                os.remove(f)

        if "reference" in data:
            self.seqs = sequence_set(self.log, data["sequences"],
                                     data["reference"],
                                     self.info["date_format"])
        else:
            self.log.fatal("No reference provided. Cannot continue.")
            # self.seqs = sequence_set(self.log, data["sequences"], False, self.info["date_format"])
        # backward compatability
        self.reference_seq = self.seqs.reference_seq
        self.proteins = self.seqs.proteins

        for trait in self.info["traits_are_dates"]:
            self.seqs.convert_trait_to_numerical_date(trait,
                                                      self.info["date_format"])

        # Prepare titers if they are available.
        if "titers" in data:
            self.log.debug("Loaded %i titer measurements" %
                           len(data["titers"]))
            # Convert titer dictionary indices from JSON-compatible strings back
            # to tuples.
            self.titers = {
                eval(key): value
                for key, value in data["titers"].iteritems()
            }

        ## usefull flag to set (from pathogen run file) to disable restoring
        self.try_to_restore = True