def write_parameters(self, output_dir, gz=False): if not os.path.exists(output_dir): try: os.makedirs(output_dir) except IOError as err: sys.stderr.write(err.message) raise err for rec in self.records: with fileIO.fwriter(os.path.join(output_dir, '{}.json'.format(rec.name)), gz=True) as outfile: rec.parameters.write(outfile, indent=4)
def read_alignments(self, input_dir, file_format, header_grep=None, compression=None): """ Get list of alignment files from an input directory *.fa, *.fas and *.phy files only Stores in self.files """ optioncheck(compression, [None, 'gz', 'bz2']) if file_format == 'fasta': extensions = ['fa', 'fas', 'fasta'] elif file_format == 'phylip': extensions = ['phy'] else: extensions = [] if compression: extensions = ['.'.join([x, compression]) for x in extensions] files = fileIO.glob_by_extensions(input_dir, extensions) files.sort(key=SORT_KEY) self._input_files = files records = [] pbar = setup_progressbar("Loading files", len(files), simple_progress=True) pbar.start() for i, f in enumerate(files): if compression is not None: with fileIO.TempFile() as tmpfile: with fileIO.freader(f, compression) as reader, fileIO.fwriter(tmpfile) as writer: for line in reader: writer.write(line) try: record = Alignment(tmpfile, file_format, True) except RuntimeError: record = Alignment(tmpfile, file_format, False) else: try: record = Alignment(f, file_format, True) except RuntimeError: record = Alignment(f, file_format, False) if header_grep: try: datatype = 'dna' if record.is_dna() else 'protein' record = Alignment([(header_grep(x), y) for (x, y) in record.get_sequences()], datatype) except TypeError: raise TypeError("Couldn't apply header_grep to header\n" "alignment number={}, name={}\n" "header_grep={}".format(i, fileIO.strip_extensions(f), header_grep)) except RuntimeError: print('RuntimeError occurred processing alignment number={}, name={}' .format(i, fileIO.strip_extensions(f))) raise record.name = (fileIO.strip_extensions(f)) records.append(record) pbar.update(i) pbar.finish() return records
def read_alignments(self, input_dir, file_format, header_grep=None, compression=None): """ Get list of alignment files from an input directory *.fa, *.fas and *.phy files only Stores in self.files """ optioncheck(compression, [None, 'gz', 'bz2']) if file_format == 'fasta': extensions = ['fa', 'fas', 'fasta'] elif file_format == 'phylip': extensions = ['phy'] else: extensions = [] if compression: extensions = ['.'.join([x, compression]) for x in extensions] files = fileIO.glob_by_extensions(input_dir, extensions) files.sort(key=SORT_KEY) self._input_files = files records = [] pbar = setup_progressbar("Loading files", len(files), simple_progress=True) pbar.start() for i, f in enumerate(files): if compression is not None: with fileIO.TempFile() as tmpfile: with fileIO.freader(f, compression) as reader, fileIO.fwriter( tmpfile) as writer: for line in reader: writer.write(line) try: record = Alignment(tmpfile, file_format, True) except RuntimeError: record = Alignment(tmpfile, file_format, False) else: try: record = Alignment(f, file_format, True) except RuntimeError: record = Alignment(f, file_format, False) if header_grep: try: datatype = 'dna' if record.is_dna() else 'protein' record = Alignment([(header_grep(x), y) for (x, y) in record.get_sequences()], datatype) except TypeError: raise TypeError("Couldn't apply header_grep to header\n" "alignment number={}, name={}\n" "header_grep={}".format( i, fileIO.strip_extensions(f), header_grep)) except RuntimeError: print( 'RuntimeError occurred processing alignment number={}, name={}' .format(i, fileIO.strip_extensions(f))) raise record.name = (fileIO.strip_extensions(f)) records.append(record) pbar.update(i) pbar.finish() return records