def write_sequence(args): _, ext = os.path.splitext(args.fasta) if ext: ext = ext[1:] # remove the dot from extension fasta = Fasta(args.fasta, default_seq=args.default_seq, strict_bounds=not args.lazy, split_char=args.delimiter) regions_to_fetch, split_function = split_regions(args) if not regions_to_fetch: regions_to_fetch = tuple(fasta.keys()) for region in regions_to_fetch: name, start, end = split_function(region) if args.split_files: # open output file based on sequence name filename = '.'.join(str(e) for e in (name, start, end, ext) if e) filename = ''.join(c for c in filename if c.isalnum() or c in keepcharacters) outfile = open(filename, 'w') else: outfile = sys.stdout try: for line in fetch_sequence(args, fasta, name, start, end): outfile.write(line) except FetchError as e: raise FetchError(e.msg.rstrip() + "Try setting --lazy.\n") if args.split_files: outfile.close() fasta.__exit__()
def write_sequence(args): _, ext = os.path.splitext(args.fasta) if ext: ext = ext[1:] # remove the dot from extension filt_function = re.compile(args.regex).search if args.invert_match: filt_function = lambda x: not re.compile(args.regex).search(x) fasta = Fasta(args.fasta, default_seq=args.default_seq, key_function=eval(args.header_function), strict_bounds=not args.lazy, split_char=args.delimiter, filt_function=filt_function, read_long_names=args.long_names, rebuild=not args.no_rebuild) regions_to_fetch, split_function = split_regions(args) if not regions_to_fetch: regions_to_fetch = fasta.keys() header = False for region in regions_to_fetch: name, start, end = split_function(region) if args.size_range: if start is not None and end is not None: sequence_len = end - start else: sequence_len = len(fasta[name]) if args.size_range[0] > sequence_len or args.size_range[ 1] < sequence_len: continue if args.split_files: # open output file based on sequence name filename = '.'.join(str(e) for e in (name, start, end, ext) if e) filename = ''.join(c for c in filename if c.isalnum() or c in keepcharacters) outfile = smart_open(filename, 'w', s3_upload=s3_config) elif args.out: outfile = args.out else: outfile = sys.stdout try: if args.transform: if not header and args.transform == 'nucleotide': outfile.write("name\tstart\tend\tA\tT\tC\tG\tN\tothers\n") header = True outfile.write(transform_sequence(args, fasta, name, start, end)) else: for line in fetch_sequence(args, fasta, name, start, end): outfile.write(line) except FetchError as e: raise FetchError(str(e) + " Try setting --lazy.\n") if args.split_files: outfile.close() fasta.__exit__()
def write_sequence(args): _, ext = os.path.splitext(args.fasta) if ext: ext = ext[1:] # remove the dot from extension filt_function = re.compile(args.regex).search fasta = Fasta(args.fasta, default_seq=args.default_seq, strict_bounds=not args.lazy, split_char=args.delimiter, filt_function=filt_function, rebuild=not args.no_rebuild) regions_to_fetch, split_function = split_regions(args) if not regions_to_fetch: regions_to_fetch = fasta.keys() if args.invert_match: sequences_to_exclude = set([split_function(region)[0] for region in regions_to_fetch]) fasta = Fasta(args.fasta, default_seq=args.default_seq, strict_bounds=not args.lazy, split_char=args.delimiter, rebuild=not args.no_rebuild) regions_to_fetch = (key for key in fasta.keys() if key not in sequences_to_exclude) split_function = ucsc_split header = False for region in regions_to_fetch: name, start, end = split_function(region) if args.size_range: if start is not None and end is not None: sequence_len = end - start else: sequence_len = len(fasta[name]) if args.size_range[0] > sequence_len or args.size_range[1] < sequence_len: continue if args.split_files: # open output file based on sequence name filename = '.'.join(str(e) for e in (name, start, end, ext) if e) filename = ''.join(c for c in filename if c.isalnum() or c in keepcharacters) outfile = open(filename, 'w') elif args.out: outfile = args.out else: outfile = sys.stdout try: if args.transform: if not header and args.transform == 'nucleotide': outfile.write("name\tstart\tend\tA\tT\tC\tG\tN\n") header = True outfile.write(transform_sequence(args, fasta, name, start, end)) else: for line in fetch_sequence(args, fasta, name, start, end): outfile.write(line) except FetchError as e: raise FetchError(e.msg.rstrip() + "Try setting --lazy.\n") if args.split_files: outfile.close() fasta.__exit__()
def write_sequence(args): _, ext = os.path.splitext(args.fasta) if ext: ext = ext[1:] # remove the dot from extension filt_function = re.compile(args.regex).search fasta = Fasta(args.fasta, default_seq=args.default_seq, strict_bounds=not args.lazy, split_char=args.delimiter, filt_function=filt_function) regions_to_fetch, split_function = split_regions(args) if not regions_to_fetch: regions_to_fetch = fasta.keys() if args.invert_match: sequences_to_exclude = set([split_function(region)[0] for region in regions_to_fetch]) fasta = Fasta(args.fasta, default_seq=args.default_seq, strict_bounds=not args.lazy, split_char=args.delimiter) regions_to_fetch = (key for key in fasta.keys() if key not in sequences_to_exclude) split_function = ucsc_split header = False for region in regions_to_fetch: name, start, end = split_function(region) if args.size_range: if start is not None and end is not None: sequence_len = end - start else: sequence_len = len(fasta[name]) if args.size_range[0] > sequence_len or args.size_range[1] < sequence_len: continue if args.split_files: # open output file based on sequence name filename = '.'.join(str(e) for e in (name, start, end, ext) if e) filename = ''.join(c for c in filename if c.isalnum() or c in keepcharacters) outfile = open(filename, 'w') elif args.out: outfile = args.out else: outfile = sys.stdout try: if args.transform: if not header and args.transform == 'nucleotide': outfile.write("name\tstart\tend\tA\tT\tC\tG\tN\n") header = True outfile.write(transform_sequence(args, fasta, name, start, end)) else: for line in fetch_sequence(args, fasta, name, start, end): outfile.write(line) except FetchError as e: raise FetchError(e.msg.rstrip() + "Try setting --lazy.\n") if args.split_files: outfile.close() fasta.__exit__()