def __init__(self, config): """ check config file, make necessary directories, set up logger """ super(prepare, self).__init__() self.config = combine_configs("prepare", config) try: assert (os.path.basename(os.getcwd()) == self.config["dir"]) except AssertionError: print("Run this script from within the {} directory".format( self.config["dir"])) sys.exit(2) for p in [self.config["output_folder"]]: if not os.path.isdir(p): os.makedirs(p) self.log = logger(self.config["output_folder"], False) try: if self.config["segments"] == False: assert (len(self.config["input_paths"]) == 1) self.config["segments"] = ["genome"] else: assert (len(self.config["segments"]) == len( self.config["input_paths"])) except AssertionError: self.log.fatal("Config file: # segments don't match # input paths") try: for p in self.config["input_paths"]: assert (os.path.exists(p)) except AssertionError: self.log.fatal( "Config file: input path '{}' doesn't exist".format(p)) # this block initialses this.segments if self.config["input_format"] == "fasta": self.load_fasta() else: self.log.fatal( "Currently only FASTA sequences can be loaded".format( self.config["dir"]))
def __init__(self, config): """ check config file, make necessary directories, set up logger """ super(process, self).__init__() self.config = combine_configs("process", config) try: assert (os.path.basename(os.getcwd()) == self.config["dir"]) except AssertionError: print("Run this script from within the {} directory".format( self.config["dir"])) sys.exit(2) for p in self.config["output"].values(): if not os.path.isdir(p): os.makedirs(p) self.log = logger(self.config["output"]["data"], False) # parse the JSON into different data bits try: with open(self.config["in"], 'r') as fh: data = json.load(fh) except Exception as e: self.log.fatal("Error loading JSON. Error: {}".format(e)) self.info = data["info"] if "time_interval" in data["info"]: self.info["time_interval"] = [ datetime.strptime(x, '%Y-%m-%d').date() for x in data["info"]["time_interval"] ] self.info["lineage"] = data["info"]["lineage"] if 'leaves' in data: self.tree_leaves = data['leaves'] try: self.colors = data["colors"] except KeyError: self.log.notify("* colours have not been set") self.colors = False try: self.lat_longs = data["lat_longs"] except KeyError: self.log.notify("* latitude & longitudes have not been set") self.lat_longs = False # backwards compatability - set up file_dumps (need to rewrite sometime) # self.sequence_fname = self.input_data_path+'.fasta' self.file_dumps = {} self.output_path = os.path.join(self.config["output"]["data"], self.info["prefix"]) self.file_dumps['seqs'] = self.output_path + '_sequences.pkl.gz' self.file_dumps['tree'] = self.output_path + '_tree.newick' self.file_dumps['nodes'] = self.output_path + '_nodes.pkl.gz' if self.config["clean"] == True: self.log.notify("Removing intermediate files for a clean build") for f in glob.glob(self.output_path + "*"): os.remove(f) if "reference" in data: self.seqs = sequence_set(self.log, data["sequences"], data["reference"], self.info["date_format"]) else: self.log.fatal("No reference provided. Cannot continue.") # self.seqs = sequence_set(self.log, data["sequences"], False, self.info["date_format"]) # backward compatability self.reference_seq = self.seqs.reference_seq self.proteins = self.seqs.proteins for trait in self.info["traits_are_dates"]: self.seqs.convert_trait_to_numerical_date(trait, self.info["date_format"]) # Prepare titers if they are available. if "titers" in data: self.log.debug("Loaded %i titer measurements" % len(data["titers"])) # Convert titer dictionary indices from JSON-compatible strings back # to tuples. self.titers = { eval(key): value for key, value in data["titers"].iteritems() } ## usefull flag to set (from pathogen run file) to disable restoring self.try_to_restore = True