def scaffold_dict_init(self, file, introns): sequences_object = FastaFile(file) dict = {} for i in introns: if (not (i.scaffold_id in dict)): dict[i.scaffold_id] = sequences_object.fetch(i.scaffold_id) return dict
def extract_fasta_to_file(fasta, output_dir, mode='2D_transpose_bcolz', overwrite=False): assert mode in _array_writer makedirs(output_dir, exist_ok=overwrite) fasta_file = FastaFile(fasta) file_shapes = {} for chrom, size in zip(fasta_file.references, fasta_file.lengths): data = np.zeros((size, NUM_SEQ_CHARS), dtype=np.float32) seq = fasta_file.fetch(chrom) one_hot_encode_sequence(seq, data) shape = data.shape shape_transpose = shape[::-1] file_shapes[chrom] = shape_transpose _array_writer[mode](data, os.path.join( output_dir, chrom)) #We have the metadata shape to be the transposed shape with open(os.path.join(output_dir, 'metadata.json'), 'w') as fp: json.dump( { 'file_shapes': file_shapes, 'type': 'array_{}'.format(mode), 'source': fasta }, fp)
def split_variants_to_files(vcf_file, genome_file, bi_file, multi_file): vcf = VariantFile(vcf_file) genome = FastaFile(genome_file) vcf.header.add_line('##INFO=<ID=multi,Number=0,Type=Flag,' 'Description="Variant with multiple allele">') vcf.header.add_line('##INFO=<ID=duplicated,Number=0,Type=Flag,' 'Description="Duplicated in position">') vcf.header.formats.add('GT', 1, 'String', "Genotype") vcf.header.add_sample("Genotype") with open(bi_file, 'wt') as outbi: with open(multi_file, 'wt') as outmu: outbi.write(str(vcf.header)) outmu.write(str(vcf.header)) for multi_alleles, duplicated, record in iter_wanted_variants( vcf, genome): record = record_to_string(record) + ['GT', '0/1'] record = '\t'.join(record) if duplicated: continue if multi_alleles: outmu.write(record + '\n') else: outbi.write(record + '\n')
def split_variants(vcf_file, genome_file): vcf = VariantFile(vcf_file) genome = FastaFile(genome_file) vcf.header.add_line('##INFO=<ID=multi,Number=0,Type=Flag,' 'Description="Variant with multiple allele">') vcf.header.add_line('##INFO=<ID=duplicated,Number=0,Type=Flag,' 'Description="Duplicated in position">') vcf.header.formats.add('GT', 1, 'String', "Genotype") vcf.header.add_sample("Genotype") print(vcf.header, end='') for multi_alleles, duplicated, record in iter_wanted_variants(vcf, genome): record = record_to_string(record) + ['GT', '0/1'] if multi_alleles: add = "multi" if record[6] else ";multi" record[6] += add if duplicated: add = "duplicated" if record[6] else ";duplicated" record[6] += add record = '\t'.join(record) print(record)
def __init__(self, ref_fa_path=None, vcf_path=None, idx_path=None, batch_size=32, bin_size=100, tie='r'): ''' :param str ref_fa_path: Path to indexed reference fasta :param str vcf_path: Path to indexed vcf :param str idx_path: Path to bed-file which will contain the names and locations of compatible variants :param int batch_size: Batch size :param int bin_size: Length of the DNA-sequences (centered on the start position of the variant) ''' self.vcf = VariantFile(vcf_path) self.ref = FastaFile(ref_fa_path) assert os.path.isfile( ref_fa_path + '.fai'), 'Error: no index found for Fasta-file: {}'.format( ref_fa_path) self.idx_path = idx_path self.batch_size = batch_size self.bin_size = bin_size assert tie in ['l', 'r'] self.tie = tie if not bin_size % 2: self.offset = 0 if tie == 'r' else 1 else: self.offset = 0 self.n_variants = self._initialize_index() self._verify_refmatch()
def initWorker(localWindowSize, fastaFile, k, N, M): global FA, windowSize, kSize, useN, method windowSize = localWindowSize FA = FastaFile(fastaFile) kSize = k useN = N method = M
def _chrom_names(fasta_file): """Get the list of chromosome names from a fasta file """ from pysam import FastaFile with FastaFile(fasta_file) as fa: chroms = list(fa.references) return chroms
def shotgun_library(fasta_file, mu, sigma, direction=(1, -1)): """Generate random fragment sequences of a given input sequence :param seq: input sequence. :param mu: mean fragment length. :param sigma: stdv of fragment length. :param direction: tuple represention direction of output sequences with respect to the input sequence. :yields: sequence fragments. .. note:: Could be made more efficient using buffers for random samples and handling cases separately. """ fasta = FastaFile(fasta_file) seq_lens = [fasta.get_reference_length(x) for x in fasta.references] total_len = sum(seq_lens) seq_probs = [x / total_len for x in seq_lens] # FastaFile.fetch is proper slow, just read everything refs = fasta.references fasta = {k: fasta.fetch(k) for k in refs} def random_buffer(probs, size=10000): while True: buf = [] for x, n in zip(range(len(probs)), np.random.multinomial(size, probs)): buf.extend([x] * n) np.random.shuffle(buf) for x in buf: yield x seq_chooser = random_buffer(seq_probs) # parameters for lognormal mean = np.log(mu / np.sqrt(1 + sigma**2 / mu**2)) stdv = np.sqrt(np.log(1 + sigma**2 / mu**2)) while True: # choose a seq based on length seq_i = next(seq_chooser) seq = fasta[refs[seq_i]] seq_len = seq_lens[seq_i] start = np.random.randint(0, seq_len) frag_length = int(np.random.lognormal(mean, stdv)) move = np.random.choice(direction) end = max(0, start + move * frag_length) start, end = sorted([start, end]) if end - start < 2: # Expand a bit to ensure we grab at least one base. start = max(0, start - 1) end += 1 frag_seq = seq[start:end] if move == -1: frag_seq = reverse_complement(frag_seq) yield frag_seq, refs[seq_i], start, end, '+' if move == 1 else '-'
def main(args): sample_name = extract_sample_name(args.input_path) with open(args.input_path) as cnv_input, FastaFile(args.genome_ref) as genome_ref,\ open(args.output_path, 'w') as vcf_output: is_full_chrom_name = genome_ref.references[0].startswith('chr') cnv_reader = csv.DictReader(cnv_input, delimiter='\t') vcf_output.write('\n'.join(get_vcf_headers(sample_name, genome_ref)) + '\n') for cnv_line in cnv_reader: vcf_line = get_vcf_line(cnv_line, genome_ref, is_full_chrom_name) vcf_output.write(vcf_line + '\n')
def _extract(self, intervals, out, **kwargs): fasta = FastaFile(self._datafile) for index, interval in enumerate(intervals): seq = fasta.fetch(str(interval.chrom), interval.start, interval.stop) out[index, :, :, 0] = one_hot_encode_sequence(seq) return out
def vcf2chain(input_file, fasta_file, strain, output_file, vcf_keep=False, passed=False, quality=False, diploid=False): """ :param input_file: :param fasta_file: :param strain: :param output_file: :param vcf_keep: :param passed: :param quality: :param diploid: :return: """ start = time.time() input_file = g2g_fu.check_file(input_file) fasta_file = g2g_fu.check_file(fasta_file) if not strain: raise G2GValueError("No strain was specified.") output_file = g2g_fu.check_file(output_file, 'w') output_file_dir = os.path.dirname(output_file) LOG.info("VCF FILE: {0}".format(input_file)) LOG.info("FASTA FILE: {0}".format(fasta_file)) LOG.info("CHAIN FILE: {0}".format(output_file)) vcf_discard_file = None if vcf_keep: vcf_discard_file = "{0}.errors.vcf".format(os.path.basename(input_file)) vcf_discard_file = os.path.join(output_file_dir, vcf_discard_file) LOG.info("VCF DISCARD FILE: {0}".format(vcf_discard_file)) LOG.info("STRAIN: {0}".format(strain)) LOG.info("PASS FILTER ON: {0}".format(str(passed))) LOG.info("QUALITY FILTER ON: {0}".format(str(quality))) LOG.info("DIPLOID: {0}".format(str(diploid))) if not isinstance(fasta_file, FastaFile): fasta_file = FastaFile(fasta_file) tb = TabixFile(input_file) sample_index = None for h in tb.header: if h[:6] == '#CHROM': try: elems = h.split('\t') samples = elems[9:] samples = dict(zip(samples, (x for x in xrange(len(samples))))) sample_index = samples[strain] except KeyError, ke: raise G2GVCFError("Unknown strain '{0}', valid strains are: {1}".format(strain, ", ".join(samples)))
def get_chrom_sizes(fasta_file, chromosomes=None): """Get chromosome files from a fasta file """ from pysam import FastaFile fa = FastaFile(fasta_file) if chromosomes is None: genome = [(c, l) for c, l in zip(fa.references, fa.lengths)] else: genome = [(c, l) for c, l in zip(fa.references, fa.lengths) if c in chromosomes] return genome
def Get_fusionGene_seq(GenesU,GenesV,geneCoordniates_dict,reference_fa=''): if not (os.path.exists(reference_fa) or os.path.exists(os.curdir+'/'+reference_fa)): print('Error: There is no reference fasta files') exit(1) genome_name=os.path.basename(reference_fa) if not os.path.exists(genome_name): genome=os.system('ln -s %s %s'%(reference_fa,genome_name)) genome_index=os.system('samtools faidx %s'%genome_name) genome=FastaFile(genome_name) fusiongenes_ref_U=open('fusion_total_index/fusiongenes_ref_U.fa','w') for gene in GenesU: chr, strand, start, end=None,None,None,None try: chr, strand, start, end, gene = geneCoordniates_dict[gene] except KeyError as e: print('%s: Input gene name wasnot found in Gtf,Check gene names'%e) exit(1) if strand!=None: if strand == '1': seq = genome.fetch(reference=chr, start=int(start), end=int(end)) else: seq_plus = genome.fetch(reference=chr, start=int(start), end=int(end)) trantab = str.maketrans('ACGTacgtNn', 'TGCAtgcaNn') seq = seq_plus.translate(trantab) seq = seq[::-1] fusiongenes_ref_U.write('>%s \n' % gene) for line in re.findall(r'.{60}', seq): fusiongenes_ref_U.write('%s\n' % line) fusiongenes_ref_U.close() fusiongenes_ref_V=open('fusion_total_index/fusiongenes_ref_V.fa','w') for gene in GenesV: chr, strand, start, end = None, None, None, None try: chr, strand, start, end, gene = geneCoordniates_dict[gene] except KeyError as e: print('%s: Input gene name wasnot found in Gtf,Check gene names'%e) exit(1) if strand != None: if strand == '1': seq = genome.fetch(reference=chr, start=int(start), end=int(end)) else: seq_plus = genome.fetch(reference=chr, start=int(start), end=int(end)) trantab = str.maketrans('ACGTacgtNn', 'TGCAtgcaNn') seq = seq_plus.translate(trantab) seq = seq[::-1] fusiongenes_ref_V.write('>%s \n' % gene) for line in re.findall(r'.{60}', seq): fusiongenes_ref_V.write('%s\n' % line) fusiongenes_ref_V.close() return 0
def __init__(self, reference_file_path): """ create fasta file object given file path to a fasta reference file :param fasta_file_path: full path to a fasta reference file """ self.fasta_file_path = reference_file_path try: self.fasta = FastaFile(self.fasta_file_path) except: raise IOError("FASTA FILE READ ERROR")
def _chrom_sizes(fasta_file): """Get the chromosome sizes for a fasta file """ from pysam import FastaFile fa = FastaFile(fasta_file) chrom_lens = OrderedDict([(name, l) for name, l in zip(fa.references, fa.lengths)]) if len(chrom_lens) == 0: raise ValueError(f"no chromosomes found in fasta file: {fasta_file}. " "Make sure the file path is correct and that the fasta index " "file {fasta_file}.fai is up to date") fa.close() return chrom_lens
def main(): min_region_size = 1000 genome = FastaFile("GRCh38.genome.fa") for region, label in iter_peaks_and_labels(sys.argv[1]): # create a new region exactly min_region_size basepairs long centered on # region expanded_start = region[1] + (region[2] - region[1])/2 - min_region_size/2 expanded_stop = expanded_start + min_region_size region = (region[0], expanded_start, expanded_stop) print region, label print genome.fetch(*region) return
def generate_homopolymer_plots(bed_file, fasta_file, bam_file): bed_file_records = open(bed_file, 'r') for line in bed_file_records: contig, start_pos, end_pos = line.rstrip().split('\t') start_pos = int(start_pos) end_pos = int(end_pos) if start_pos < 1000: continue if end_pos - start_pos > 50: continue samfile = pysam.AlignmentFile(bam_file, "rb") assembly_fasta_file = FastaFile(fasta_file) reference_sequence = assembly_fasta_file.fetch(reference=contig, start=start_pos, end=start_pos + 200) reference_homopolymer_index_start = 1 reference_homopolymer_index_end = 1 homopolymer_base = reference_sequence[reference_homopolymer_index_start] # print(homopolymer_base) while reference_homopolymer_index_end < len(reference_sequence) and reference_sequence[reference_homopolymer_index_end] == homopolymer_base: reference_homopolymer_index_end += 1 # print(reference_sequence[reference_homopolymer_index_start:reference_homopolymer_index_end]) reference_homopolymer_length = reference_homopolymer_index_end - reference_homopolymer_index_start all_reads = samfile.fetch(contig, start_pos - 1, end_pos) read_homopolymers = [] for read in all_reads: aligned_pairs = read.get_aligned_pairs() start_index = 0 for index, position in aligned_pairs: if index is None: continue if position == start_pos: start_index = index + 1 break if read.query_sequence is None: continue if start_index == len(read.query_sequence): continue homopolymer_base = read.query_sequence[start_index] # print(homopolymer_base) end_index = start_index while end_index < len(read.query_sequence) and read.query_sequence[end_index] == homopolymer_base: end_index += 1 read_homopolymer_length = end_index - start_index read_homopolymers.append(read_homopolymer_length) print(contig + "\t" + str(start_pos) + "\t" + str(end_pos) + "\t" + str(reference_homopolymer_length) + "\t" + str(','.join([str(x) for x in read_homopolymers])))
def close(self): if self._fh: self._fh.close() self._fh = None subprocess.check_call([self._bgzip_exe, "--force", self._basepath]) os.rename(self._basepath + ".gz", self.filename) # open file with FastaFile to create indexes, then make all read-only _fh = FastaFile(self.filename) _fh.close() os.chmod(self.filename, stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH) os.chmod(self.filename + ".fai", stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH) os.chmod(self.filename + ".gzi", stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH) logger.info("{} written; added {} sequences".format(self.filename, len(self._added)))
def __init__(self, datafile, use_strand=False, **kwargs): """Fasta file extractor NOTE: The extractor is not thread-save. If you with to use it with multiprocessing, create a new extractor object in each process. Args: datafile (str): path to the bigwig file use_strand (bool): if True, the extracted sequence is reverse complemented in case interval.strand == "-" """ super(FastaExtractor, self).__init__(datafile, **kwargs) self.use_strand = use_strand self.fasta = FastaFile(self._datafile)
def countContexts(fastaFilePath, whiteListBed=None, blackListBed=None): debug(f"Starting to count contexts of nucleotides in {fastaFilePath}") triNucCounts = defaultdict(int) diNucCounts = defaultdict(int) # open the fastaFile with FastaFile(fastaFilePath) as fastaFile: # if we do not have a whitelist to start out, we make one from the fasta, which includes # everything if whiteListBed is None: wlObj = from_dict( { "Chromosome": fastaFile.references, "Start": [1] * fastaFile.nreferences, "End": fastaFile.lengths, } ) else: # we cast this to string, because pyranges wants string and we use the Path type wlObj = read_bed(str(whiteListBed)) wlObj = wlObj.merge() # if we have a blacklist, we subtract that from the whitelist, otherwise we leave it how # it is if not blackListBed is None: # we cast this to string, because pyranges wants string and we use the Path type blObj = read_bed(str(blackListBed)) blObj = blObj.merge() wlObj = wlObj.subtract(blObj) # shouldnt need to merge again here, as we only have less ranges than before # while we could use the get_fasta function from pyranges, it needs another # dependency (pyfaidx) and is slower (from my preliminary testing) # i terate over all chromosomes and each of the ranges for chr, df in wlObj: # iterrows has to return the index, even though we dont use it for idx, region in df.iterrows(): seq = fastaFile.fetch( reference=chr, start=region["Start"], end=region["End"] ) for i in range(len(seq) - 2): diNucCounts[seq[i : i + 2]] += 1 triNucCounts[seq[i : i + 3]] += 1 debug(f"contect frequency analysis complete for chromsome {chr}") return (diNucCounts, triNucCounts)
def data_generator_pysam(my_args, name, start, stop, is_bulk): fasta_file = FastaFile(my_args.fasta) ref = fasta_file.fetch(name, start, stop) my_arg = { 'fastafile': fasta_file, 'stepper': 'samtools', 'adjust_capq_threshold': 50, 'contig': name, 'start': start, 'stop': stop, 'min_mapping_quality': 0 if is_bulk else 20, 'min_base_quality': 13, } if is_bulk: bam_file = AlignmentFile(my_args.bulk, 'rb') else: bam_file = AlignmentFile(my_args.bam, 'rb') read_bases_list = [] for pileup_column in bam_file.pileup(**my_arg): pos = pileup_column.reference_pos if pos >= stop: break if pos < start: continue read_bases_list = pileup_column.get_query_sequences(mark_matches=True, mark_ends=True, add_indels=True) read_bases = ''.join(read_bases_list).upper() n = pileup_column.get_num_aligned() if n == 0: read_bases = '*' base_q = '*' map_q = '*' else: base_q = ''.join([chr(int(i) + PHREDSCORE) \ for i in pileup_column.get_query_qualities()]) map_q = ''.join([chr(int(i) + PHREDSCORE) \ for i in pileup_column.get_mapping_qualities()]) yield [name, pos, ref[pos - start], str(n), read_bases, base_q, map_q] yield None
def main(): min_region_size = 1000 genome = FastaFile("./genome/GRCh38.genome.fa") train_path = "./train_data/" list_dir = os.listdir(train_path) for filename in list_dir: for region, label in iter_peaks_and_labels(train_path + filename): # create a new region exactly min_region_size basepairs long centered on # region expanded_start = region[1] + (region[2] - region[1]) / 2 - min_region_size / 2 expanded_stop = expanded_start + min_region_size region = (region[0], expanded_start, expanded_stop) #print region, label print genome.fetch(*region), label return
def method2(basefl): fa = FLAGS.input + ".feature.fa" loader = FastaFile(fa) fl1 = FLAGS.input + ".feature.tsv" output = open("%s/20bp.fa" % (basefl), "w") for i in open(fl1, "r"): ele = i.rstrip().split() ids, pos = ele[0].split("|")[:-1] pos = int(pos) try: seq = loader.fetch(ids, pos - 30, pos + 30) output.write(">%s|%s\n%s\n" % (ids, pos, seq)) except: print("ids %s %s,error" % (ids, pos)) output.close() align_hisat2()
def extract_fasta_to_file(fasta, output_dir, overwrite): """ Returns compressed version of fasta file for a quickly accessible memory map Args: fasta: fasta file to be converted output_dir: output directory for memory map location overwrite: boolean - whether to overwrite current memory map """ for i in [0, 1]: if overwrite: if not os.path.exists(output_dir): os.makedirs(output_dir) fasta_file = FastaFile(fasta) file_shapes = {} for chrom, size in zip(fasta_file.references, fasta_file.lengths): seq = fasta_file.fetch(chrom) data = one_hot_encode_sequence(seq) file_shapes[chrom] = data.shape bcolz.carray(data, rootdir=os.path.join(output_dir, chrom), cparams=_blosc_params, mode='w').flush() mode = '2D_transpose_bcolz' metadata = { 'file_shapes': file_shapes, 'type': 'array_{}'.format(mode), 'extractor': 'CompressedFastaExtractor', 'source': fasta } with open(os.path.join(output_dir, 'metadata.json'), 'w') as fp: json.dump(metadata, fp) overwrite = False else: try: with open(os.path.join(output_dir, 'metadata.json'), 'r') as fp: metadata = json.load(fp) break except IOError as e: print("I/O error({0}): {1} for {2}".format( e.errno, e.strerror, output_dir)) print( "There is a problem with opening the metadata. Recreating the mmap files and overwriting..." ) overwrite = True return metadata
def get_contig_list_from_fasta(fasta_path, with_length=False): """Obtain list of contigs froma fasta file, all alternative contigs are pooled into the string MISC_ALT_CONTIGS_SCMO Args: fasta_path (str or pysam.FastaFile) : Path or handle to fasta file with_length(bool): return list of lengths Returns: contig_list (list ) : List of contigs + ['MISC_ALT_CONTIGS_SCMO'] if any alt contig is present in the fasta file """ contig_list = [] has_alt = False if with_length: lens = [] if type(fasta_path) is str: fa = FastaFile(fasta_path) elif type(fasta_path) is FastaFile: fa = fasta_path else: raise TypeError('Supply pysam.FastaFile or str') for reference, length in zip(fa.references, fa.lengths): if is_main_chromosome(reference): contig_list.append(reference) if with_length: lens.append(length) else: has_alt = True # Close handle if we just opened one if type(fasta_path) is str: fa.close() if has_alt: contig_list.append('MISC_ALT_CONTIGS_SCMO') if with_length: lens.append(None) if with_length: return contig_list, lens return contig_list
def extract_fasta_to_npy(fasta, output_dir): fasta_file = FastaFile(fasta) file_shapes = {} for chrom, size in zip(fasta_file.references, fasta_file.lengths): data = np.empty((NUM_SEQ_CHARS, size), dtype=np.float32) seq = fasta_file.fetch(chrom) one_hot_encode_sequence(seq, data) np.save('{}.npy'.format(os.path.join(output_dir, chrom)), data) file_shapes[chrom] = data.shape with open(os.path.join(output_dir, 'metadata.json'), 'w') as fp: json.dump( { 'file_shapes': file_shapes, 'type': 'array', 'source': fasta }, fp)
def __init__(self, reference_file_path): """ create fasta file object given file path to a fasta reference file :param fasta_file_path: full path to a fasta reference file """ self.fasta_file_path = reference_file_path assert os.path.exists( reference_file_path), "Reference path does not exist: {}".format( reference_file_path) try: self.fasta = FastaFile(self.fasta_file_path) except Exception as e: print(e) raise IOError( "Fasta File Read Error: Try indexing reference with 'samtools faidx {}'" .format(reference_file_path))
def __init__(self, fasta_file, noTEMD=False, h5_file='tmp_vote.h5', stranded=False): self.fasta_file = fasta_file self.noTEMD = noTEMD self.stranded = stranded with FastaFile(fasta_file) as FA: self.chrom_dict = { c: FA.get_reference_length(c) for c in FA.references } self.cur_chrom = '' self.h5_file = h5_file self.H5 = h5py.File(h5_file, 'a') self._genome_init()
def extract_seq(interval, variant, fasta_file, one_hot=False): """ Note: in case the variant is an indel, the anchorpoint at the beginning is used Args: interval: pybedtools.Interval where to extract the sequence from variant: Variant class with attributes: chr, pos, ref, alt fasta_file: file path or pysam.FastaFile instance one_hot: if True, one-hot-encode the output sequence Returns: sequence """ if isinstance(fasta_file, str): from pysam import FastaFile fasta_file = FastaFile(fasta_file) if variant is not None and variant.pos - 1 >= interval.start and variant.pos <= interval.stop: inside = True lendiff = len(variant.alt) - len(variant.ref) else: inside = False lendiff = 0 seq = fasta_file.fetch(str(interval.chrom), interval.start, interval.stop - lendiff) if not inside: out = seq else: # now, mutate the sequence pos = variant.pos - interval.start - 1 expect_ref = seq[pos:(pos + len(variant.ref))] if expect_ref != variant.ref: raise ValueError( f"Expected reference: {expect_ref}, observed reference: {variant.ref}" ) # Anchor at the beginning out = seq[:pos] + variant.alt + seq[(pos + len(variant.ref)):] assert len( out ) == interval.stop - interval.start # sequece length has to be correct at the end if one_hot: out = encodeDNA([out.upper()])[0] return out
def generate_header(reference_fa: str, tag: str) -> VariantHeader: """ Generates the header for the minimal VCF. :param reference_fa: Path to reference fasta file. :param tag: The filter tag to use. """ header = VariantHeader() header.filters.add(tag, None, None, "Failed dToxoG") fasta = FastaFile(reference_fa) try: for contig in fasta.references: header.contigs.add(contig, length=fasta.get_reference_length(contig)) finally: fasta.close() return header