def __init__(self, config_file, values=None, logged=False): """ reads all options in the configuration file and stores them into a dictionary checks values and presence of necessary information instead of passing a configuration file, you can pass the values directly as a dictionary """ if not logged: # this will write a file to cwd logmethods.initialise("config.log") self.log = logging.getLogger("config") self.settings = {} self.config_script_dir = os.path.dirname(os.path.realpath(sys.argv[0])) self.tax_string = "" self.ncbi_tax_db = None self.genomes_exclude = [] if config_file is None: if values is not None: self.settings = values self.tax_string = ",".join(self.settings["taxonomy_ranks"]) else: self.log.critical("Please provide either values or a configuration file.") sys.exit(1) else: self._read_file(config_file) self._check_values()
def __init__(self, db, logged=False): assert os.path.isfile(db) self.tax = taxonomy_ncbi.TaxonomyNcbi(db) self.tree = None self.leaves = None self.exc = None if not logged: # this will write a file to cwd logmethods.initialise('Ncbi2Newick.log') self.log = logging.getLogger('ncbi2newick')
def __init__(self, logged=False): self.verbose = False self.variables = {} self.files = {} # path -> Filestatus # only needed when a status is loaded from backup self.failed_files = {} if not logged: # this will write a file to cwd logmethods.initialise('status.log') self.log = logging.getLogger('status')
def __init__(self, cfg, default_ans=False, backup=False, logged=False): self.workingdir = os.path.dirname(os.path.realpath(sys.argv[0])) # some mappings self.tree_organism_map = {} # node --> list of organisms which were mapped to that node self.organism_tree_map = {} # organism --> closest node on the tree self.organism_file_map = {} # organism --> list of files including sequences self.organisms = set() # all organisms available self.nodes = [] # selected organisms = nodes on the taxonomic tree self.genomes_excluded = [] # genomes that have not been considered during computation self.organisms_invalid = set() # organisms with a lack of mapping // or that have been mapped to the root self.sqlite_taxonomy = None # SQLite database including the NCBI taxonomy self.config = cfg # contains all settings # # # # # # # Advanced training options # # # # # # # # # # # # # # # learning self.loss_function = 1 # 0:0/1 loss, 1:path loss self.z_standardization = 1 # 0:no standardization, 1:z standardization self.misc_nodes = 1 # 0:no misc nodes, 1:add misc nodes self.n_frags_per_node = 0.0 # number of fragments per node self.yes = default_ans # say yes to every question? self.tree_file = "" self.backup = backup self.stat = None self.backupdir = None if not logged: # this will write a file to cwd logmethods.initialise('train.log') self.log = logging.getLogger('train') # better root ?
taxdb=self.config.ncbi_tax_db) if self.yes: tax_call = "{} -y".format(tax_call) my_log.debug(tax_call) success = os.system(tax_call) if success != 0: my_log.critical("Error in creating the SQLite database.") sys.exit(1) my_log.debug("The NCBI database did not exist - created one: {}".format(self.config.ncbi_tax_db)) self.sqlite_taxonomy = taxonomy_ncbi.TaxonomyNcbi(self.config.ncbi_tax_db) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("-c", help="configuration file", action='store', required=True) parser.add_argument('-y', help="automatically set answers to 'yes'", action='store_true', default=False) parser.add_argument("-fb", help="backup directory from a previous PPS run, " "will try to restart from within the pipeline", action='store', default=None) parser.add_argument('-b', help="write a backup to the project directory", action='store_true', default=False) args = parser.parse_args() logmethods.initialise('train.log') cfg = config.Config(args.c, logged=True) obj = Train(cfg, args.y, args.b, logged=True) if args.fb is None: obj.main_processing() else: obj.from_backup(args.fb)