Ejemplos de FASTA.FASTA en Python, ejemplos de fasta.FASTA.FASTA en Python

Ejemplo n.º 1

0

Mostrar archivo

 def __init__(self, fwd, rev, parent=None):
     # FASTA objects #
     self.fwd = FASTA(fwd)
     self.rev = FASTA(rev)
     # Extra #
     self.gzipped = self.fwd.gzipped
     self.parent = parent

Ejemplo n.º 2

0

Mostrar archivo

def generate_values(path, progress=False):
    seqs = SeqIO.parse(path, 'fasta')
    if not progress:
        for seq in seqs: yield (seq.id, seq.description, str(seq.seq))
    if progress:
        for seq in tqdm(GenWithLength(seqs, len(FASTA(path)))):
            yield (seq.id, seq.description, str(seq.seq))

Ejemplo n.º 3

0

Mostrar archivo

Archivo: hmmer.py Proyecto: xapple/seqsearch

 def __init__(
     self,
     query_path,  # The input sequences
     db_path=pfam.hmm_db,  # The database to search
     seq_type='prot' or 'nucl',  # The seq type of the query_path file
     e_value=0.001,  # The search threshold
     params=None,  # Add extra params for the command line
     out_path=None,  # Where the results will be dropped
     executable=None,  # If you want a specific binary give the path
     cpus=None):  # The number of threads to use
     # Save attributes #
     self.query = FASTA(query_path)
     self.db = FilePath(db_path)
     self.params = params if params else {}
     self.e_value = e_value
     self.seq_type = seq_type
     self.executable = FilePath(executable)
     # Cores to use #
     if cpus is None: self.cpus = min(multiprocessing.cpu_count(), 32)
     else: self.cpus = cpus
     # Auto detect database short name #
     if db_path == 'pfam': self.db = pfam.hmm_db
     if db_path == 'tigrfam': self.db = tigrfam.hmm_db
     # Output #
     if out_path is None:
         self.out_path = FilePath(self.query.prefix_path + '.hmmout')
     elif out_path.endswith('/'):
         self.out_path = FilePath(out_path + self.query.prefix + '.hmmout')
     else:
         self.out_path = FilePath(out_path)

Ejemplo n.º 4

0

Mostrar archivo

Archivo: fastq.py Proyecto: nataszczypiora/algorytmy_sekwencjonowania

 def to_fasta(self, path, verbose=False):
     # Select verbosity #
     import tqdm
     wrapper = tqdm.tqdm if verbose else lambda x: x
     # Do it #
     with open(path, 'w') as handle:
         for r in wrapper(self): SeqIO.write(r, handle, 'fasta')
     # Return #
     return FASTA(path)

Ejemplo n.º 5

0

Mostrar archivo

Archivo: duplications.py Proyecto: xapple/ld12

 def fresh_fasta(self):
     """A file containing all the fresh water genes"""
     fasta = FASTA(self.p.fresh_fasta)
     if not fasta.exists:
         print "Building fasta file with all fresh genes..."
         fresh = [g for g in genomes.values() if g.fresh]
         shell_output('gunzip -c %s > %s' % (' '.join(fresh), fasta))
         assert len(fasta) == sum(map(len, fresh))
         self.timer.print_elapsed()
     return fasta

Ejemplo n.º 6

0

Mostrar archivo

Archivo: cluster.py Proyecto: xapple/ld12

 def fasta(self):
     """The fasta file containing the filtered genes of this cluster
     The names now will correspond to long descriptive names"""
     fasta = FASTA(self.p.fasta)
     if not fasta:
         fasta.create()
         for gene in self.filtered_genes:
             fasta.add_str(str(gene), name=gene.name)
         fasta.close()
     return fasta

Ejemplo n.º 7

0

Mostrar archivo

 def __init__(self, version, seq_type, base_dir=None):
     # Attributes #
     self.version    = version
     self.seq_type   = seq_type
     self.short_name = self.short_name + "_" + self.version
     # Base directory #
     if base_dir is None: base_dir = home
     self.base_dir = base_dir + 'databases/' + self.short_name + '/'
     self.p        = AutoPaths(self.base_dir, self.all_paths)
     # URL #
     self.url  = "release_%s/Exports/"  % self.version
     # The database #
     self.nr99_name = "SILVA_%s_SSURef_Nr99_tax_silva.fasta.gz" % self.version
     self.nr99_dest = FASTA(self.base_dir + self.nr99_name)
     self.nr99      = FASTA(self.base_dir + self.nr99_name[:-3])
     # The alignment #
     self.aligned_name = "SILVA_%s_SSURef_Nr99_tax_silva_full_align_trunc.fasta.gz" % self.version
     self.aligned_dest = FASTA(self.base_dir + self.aligned_name)
     self.aligned      = FASTA(self.base_dir + self.aligned_name[:-3])

Ejemplo n.º 8

0

Mostrar archivo

 def fasta(self):
     """Make a fasta file with all uniprot proteins that are related to
     this family."""
     fasta = FASTA(self.p.proteins)
     if not fasta.exists:
         fasta.create()
         for seq in pfam.fasta:
             if self.fam_name in seq.description: fasta.add_seq(seq)
         fasta.close()
         assert fasta
     # Return #
     return fasta

Ejemplo n.º 9

0

Mostrar archivo

    def read_file(self, fp):
        '''
        Read the first FASTA record from the content of fp,
        and set the chromosome name and sequence using set_chromosome method.
        '''
        if self.verbose:
            print >> stderr, "reading a FASTA record to set a chromosome"
        fasta = FASTA(fp=fp, verbose=self.verbose)
        chr_name, chr_seq = fasta.get_record()

        if chr_name and chr_seq:
            chr_name = chr_name[1:]
            self.set_chromosome(chr_name, chr_seq)
        elif not chr_name and not chr_seq:
            raise NoChromosomeFoundError(fp.name, chr_name, chr_seq)
        else:
            raise ChromosomeFASTAFromatError(fp.name, chr_name, chr_seq)

Ejemplo n.º 10

0

Mostrar archivo

def main():

    args = parse_args()
    dihedrals = read_dihedrals()

    fasta = FASTA(args.fasta)
    fasta.read()
    (peptides, mhcSeq, mhcAllele) = totalNineMers(fasta)
    #grooves = readGrooves(args.grooves, mhcSeq, peptides)
    universalGrooves = universalGroove(args.grooves, mhcSeq, peptides)
    intersectGrooves = IntersectionGroove(args.grooves, mhcSeq, peptides)
    #for u in universalGrooves:
    #    print (u, universalGrooves[u])

    #for u in intersectGrooves:
    #    print (intersectGrooves[u])

    labels = read_rmsd_file(args.rms)
    pdbids = read_datafile(args.t)

    outputfilehandler = open(args.pdbids, 'w')

    for pdbid in pdbids:
        if pdbid in dihedrals:
            if args.pep:
                finalSeqCode = oneHotEncoding(peptides[pdbid])
                finalLabelCode = dihedrals[pdbid]
                if args.label == 'x':
                    print(', '.join(finalSeqCode))
                    outputfilehandler.write(pdbid + '\n')
                elif args.label == 'y':
                    print(', '.join(finalLabelCode))
                    outputfilehandler.write(pdbid + '\n')
            else:
                finalSeqCode = oneHotEncoding(universalGrooves[pdbid] +
                                              peptides[pdbid])
                finalLabelCode = dihedrals[pdbid]
                if args.label == 'x':
                    print(', '.join(finalSeqCode))
                    outputfilehandler.write(pdbid + '\n')
                elif args.label == 'y':
                    print(', '.join(finalLabelCode))
                    outputfilehandler.write(pdbid + '\n')

    outputfilehandler.close()

Ejemplo n.º 11

0

Mostrar archivo

def read_fasta(args):

    fasta = FASTA(args.fasta)
    fasta.read()
    headers = fasta.get_headers()
    pep_chain = {}
    pep_seq = {}

    for header in headers:
        fields = header.split('|')
        pdbid = fields[0]
        chainid = fields[1]
        seq = fasta.get_sequence(header)

        if len(seq) == 9:
            pep_chain[pdbid] = chainid
            pep_seq[pdbid] = seq

    return (pep_chain, pep_seq)

Ejemplo n.º 12

0

Mostrar archivo

Archivo: core.py Proyecto: xapple/seqsearch

 def __init__(self,
              query_path,
              db_path,
              seq_type     = 'prot' or 'nucl',     # The seq type of the query_path file
              params       = None,                 # Add extra params for the command line
              algorithm    = "blastn" or "blastp", # Will be auto-determined with seq_type
              out_path     = None,                 # Where the results will be dropped
              executable   = None,                 # If you want a specific binary give the path
              cpus         = None,                 # The number of threads to use
              num          = None,                 # When parallelized, the number of this thread
              _out         = None,                 # Store the stdout at this path
              _err         = None):                # Store the stderr at this path
     # Main input #
     self.query = FASTA(query_path)
     # The database to search against #
     self.db = FilePath(db_path)
     # Other attributes #
     self.seq_type     = seq_type
     self.algorithm    = algorithm
     self.num          = num
     self.params       = params if params else {}
     # The standard output and error #
     self._out         = _out
     self._err         = _err
     # Output defaults #
     if out_path is None:
         self.out_path = self.query.prefix_path + self.extension
     elif out_path.endswith('/'):
         self.out_path = out_path + self.query.prefix + self.extension
     else:
         self.out_path = out_path
     # Make it a file path #
     self.out_path = FilePath(self.out_path)
     # Executable #
     self.executable = FilePath(executable)
     # Cores to use #
     if cpus is None: self.cpus = min(multiprocessing.cpu_count(), 32)
     else:            self.cpus = cpus
     # Save the output somewhere #
     if self._out is True:
         self._out = self.out_path + '.stdout'
     if self._err is True:
         self._err = self.out_path + '.stderr'

Ejemplo n.º 13

0

Mostrar archivo

def main():

    args = parse_args()

    fasta = FASTA(args.fasta)
    fasta.read()
    (peptides, mhcSeq, mhcAllele) = totalNineMers(fasta)
    #grooves = readGrooves(args.grooves, mhcSeq, peptides)
    universalGrooves = universalGroove(args.grooves, mhcSeq, peptides)
    intersectGrooves = IntersectionGroove(args.grooves, mhcSeq, peptides)
    labels = read_rmsd_file(args.rms)
    pdbids = read_datafile(args.t)

    aaindex = Aaindex()
    #for result in aaindex.search('charge'):
    #    print(result)

    record = aaindex.get('FASG890101')
    #print (record.title)
    index_data = record.index_data
    #print (index_data)

    charge = aaindex.get('KLEP840101')
    charge_data = charge.index_data
    #print (charge_data)

    for l in labels:
        (pdbid1, pdbid2) = l.split('_')
        #if pdbid1 in pdbids and pdbid2 in pdbids:
        if pdbid1 in pdbids or pdbid2 in pdbids:
            if args.pep:
                finalSeqCode, finalLabelCode = oneHotEncoding(peptides[pdbid1]+'|'+peptides[pdbid2], labels[l], index_data, charge_data)
                if args.label == 'x':
                    print (', '.join(finalSeqCode))
                elif args.label == 'y':
                    print (', '.join(finalLabelCode))
            else:
                finalSeqCode, finalLabelCode = oneHotEncoding(universalGrooves[pdbid1]+peptides[pdbid1]+'|'+universalGrooves[pdbid2]+peptides[pdbid2], labels[l], index_data, charge_data)
                if args.label == 'x':
                    print (', '.join(finalSeqCode))
                elif args.label == 'y':
                    print (', '.join(finalLabelCode))

Ejemplo n.º 14

0

Mostrar archivo

Archivo: train_test_split.py Proyecto: snerligit/BME230A

def main():

    args = parse_args()

    fasta = FASTA(args.fasta)
    fasta.read()
    peptides = totalNineMers(fasta)
    pdbids = peptides.keys()
    testsetlen = int(args.percent * len(pdbids))

    trainset = []
    testset = []
    for i in range(0, len(pdbids)):
        r = random()
        if len(testset) < testsetlen and r < 0.5:
            testset.append(pdbids[i])
        else:
            trainset.append(pdbids[i])

    write_to_file('train/90_10/train.txt', trainset)
    write_to_file('test/90_10/test.txt', testset)

Ejemplo n.º 15

0

Mostrar archivo

def main():

    args = parse_args()

    fasta = FASTA(args.fasta)
    fasta.read()
    peptides, alleles = totalNineMers(fasta)
    pdbids = peptides.keys()
    testsetlen = int(args.percent * len(pdbids))

    trainset = []
    testset = []
    for p in pdbids:
        r = random()
        if len(testset) < testsetlen and r < 0.5 and alleles[p] == 'A0201':
            testset.append(p)
        else:
            trainset.append(p)

    write_to_file('train.txt', trainset)
    write_to_file('test.txt', testset)

Ejemplo n.º 16

0

Mostrar archivo

 def __init__(self, path, num_parts=None, part_size=None, base_dir=None):
     # Basic #
     self.path = path
     # Directory #
     if base_dir is None: self.base_dir = path + '.parts/'
     else: self.base_dir = base_dir
     # Num parts #
     if num_parts is not None: self.num_parts = num_parts
     # Evaluate size #
     if part_size is not None:
         self.bytes_target = humanfriendly.parse_size(part_size)
         self.num_parts = int(
             math.ceil(self.count_bytes / self.bytes_target))
     # Make parts #
     self.make_name = lambda i: self.base_dir + "%03d/part.fasta" % i
     self.parts = [
         FASTA(self.make_name(i)) for i in range(1, self.num_parts + 1)
     ]
     # Give a number to each part #
     for i, part in enumerate(self.parts):
         part.num = i

Ejemplo n.º 17

0

Mostrar archivo

Archivo: extract_from_structures.py Proyecto: snerligit/BME230A

def main():

    args = parse_args()

    fasta = FASTA(args.fasta)
    fasta.read()
    (peptides, mhcSeq, mhcAllele) = totalNineMers(fasta)
    #grooves = readGrooves(args.grooves, mhcSeq, peptides)
    universalGrooves = universalGroove(args.grooves, mhcSeq, peptides)
    intersectGrooves = IntersectionGroove(args.grooves, mhcSeq, peptides)
    #for u in universalGrooves:
    #    print (u, universalGrooves[u])

    #for u in intersectGrooves:
    #    print (intersectGrooves[u])

    labels = read_rmsd_file(args.rms)
    pdbids = read_datafile(args.t)

    for l in labels:
        (pdbid1, pdbid2) = l.split('_')
        #if pdbid1 in pdbids and pdbid2 in pdbids:
        if pdbid1 in pdbids or pdbid2 in pdbids:
            if args.pep:
                finalSeqCode, finalLabelCode = oneHotEncoding(
                    peptides[pdbid1] + '|' + peptides[pdbid2], labels[l])
                if args.label == 'x':
                    print(', '.join(finalSeqCode))
                elif args.label == 'y':
                    print(', '.join(finalLabelCode))
            else:
                finalSeqCode, finalLabelCode = oneHotEncoding(
                    universalGrooves[pdbid1] + peptides[pdbid1] + '|' +
                    universalGrooves[pdbid2] + peptides[pdbid2], labels[l])
                if args.label == 'x':
                    print(', '.join(finalSeqCode))
                elif args.label == 'y':
                    print(', '.join(finalLabelCode))

Ejemplo n.º 18

0

Mostrar archivo

Archivo: nt.py Proyecto: xapple/seqsearch

 def test(self):
     """Search one sequence, and see if it works."""
     # New directory #
     directory = new_temp_dir()
     # A randomly chosen sequence (H**o sapiens mRNA for prepro cortistatin) #
     seq = """ACAAGATGCCATTGTCCCCCGGCCTCCTGCTGCTGCTGCTCTCCGGGGCCACGGCCACCGCTGCCCTGCC
     CCTGGAGGGTGGCCCCACCGGCCGAGACAGCGAGCATATGCAGGAAGCGGCAGGAATAAGGAAAAGCAGC
     CTCCTGACTTTCCTCGCTTGGTGGTTTGAGTGGACCTCCCAGGCCAGTGCCGGGCCCCTCATAGGAGAGG
     AAGCTCGGGAGGTGGCCAGGCGGCAGGAAGGCGCACCCCCCCAGCAATCCGCGCGCCGGGACAGAATGCC
     CTGCAGGAACTTCTTCTGGAAGACCTTCTCCTCCTGCAAATAAAACCTCACCCATGAATGCTCACGCAAG
     TTTAATTACAGACCTGAA"""
     seq = seq.replace('\n','')
     seq = seq.replace(' ','')
     # Make input #
     input_fasta = FASTA(directory + 'input.fasta')
     input_fasta.create()
     input_fasta.add_str(seq, "My test sequence")
     input_fasta.close()
     # Make output #
     out_path = directory + 'output.blast'
     # Make extras parameters #
     params = {'-outfmt': 0,
               '-evalue': 1e-5,
               '-perc_identity': 99}
     # Make the search #
     search = SeqSearch(input_fasta,
                        self.blast_db,
                        'nucl',
                        'blast',
                        num_threads = 1,
                        out_path    = out_path,
                        params      = params)
     # Run it #
     search.run()
     # Print result #
     print("Success", directory)

Ejemplo n.º 19

0

Mostrar archivo

 def seeds(self):
     seeds = FASTA(self.autopaths.seed)
     return seeds

Ejemplo n.º 20

0

Mostrar archivo

 def fasta(self):
     fasta = FASTA(self.autopaths.fasta)
     return fasta

Ejemplo n.º 21

0

Mostrar archivo

 def subsampled(self):
     subsampled = FASTA(self.p.subsampled)
     if not subsampled.exists:
         self.fasta.subsample(down_to=30, new_path=subsampled)
         self.add_taxonomy(subsampled)
     return subsampled

Ejemplo n.º 22

0

Mostrar archivo

    'TTDB/TriTrypDB-46_TcruziCLBrenerEsmeraldo-like_AnnotatedCDSs.fasta',
    'organism': 'TcruziCLBrenerEsmeraldo-like'
}
non_emeraldo = {
    'genome_filename':
    'TTDB/TriTrypDB-46_TcruziCLBrenerNon-Esmeraldo-like_Genome.fasta',
    'regions_filename':
    'TTDB/TriTrypDB-46_TcruziCLBrenerNon-Esmeraldo-like_AnnotatedCDSs.fasta',
    'organism': 'TcruziCLBrenerNon-Esmeraldo-like'
}

organism = emeraldo_like

if __name__ == "__main__":
    # Load FASTA files
    genome = FASTA(organism['genome_filename'])
    genome.load()

    regions = FASTA(organism['regions_filename'])
    regions.load()

    # Load database file
    sqlite = sqlite3.connect(SQLite_DB)

    # Create MFASeq Folder
    Organism_MFASeq_folder = f"{MFASeq_folder}/MFA-Seq_{organism['organism']}"
    if not os.path.isdir(Organism_MFASeq_folder):
        os.mkdir(Organism_MFASeq_folder)

    # Create MFASeq Files
    for chromosome_id in genome.data.keys():

Ejemplo n.º 23

0

Mostrar archivo

Archivo: dgf_stats.py Proyecto: seijihariki/redymo-tcruzi-analysis

                    "--basepairs",
                    action='store_true',
                    help="Use base pairs instead of genome counts")

args = parser.parse_args()

protein_fasta = args.fasta or '/home/seijihariki/Documents/TCC/TTDB/TriTrypDB-46_TcruziCLBrenerEsmeraldo-like_AnnotatedTranscripts.fasta'
simulation_folder = args.simulation
search = args.search or 'DGF-1'
base_pairs = args.basepairs

simulation_cnt = args.count or 50
chromosomes_cnt = args.chromosomes or 41

print('Loading annotations:')
transcripts = FASTA(protein_fasta)
transcripts.load()

collisions = {}

print('Detecting collisions:')
for chromosome in range(chromosomes_cnt):
    chromosome_name = f"TcChr{chromosome + 1}-S"
    collisions[chromosome_name] = []

    for simulation in range(simulation_cnt):
        with open(
                f"{simulation_folder}simulation_{simulation}/{chromosome_name}.cseq"
        ) as times:
            start, end = -2, -2
            current_location = 0

Ejemplo n.º 24

0

Mostrar archivo

"""

# Built-in modules #
import inspect, os

# Internal modules #
from seqsearch.databases.ncbi_16s import ncbi_16s
from seqsearch.search.blast import BLASTquery

# First party modules #
from fasta import FASTA

# Get current directory #
file_name = inspect.getframeinfo(inspect.currentframe()).filename
this_dir = os.path.dirname(os.path.abspath(file_name)) + '/'

###############################################################################
if __name__ == "__main__":

    # Main input #
    seqs = FASTA(this_dir + 'seqs.fasta')

    # The database to search against #
    db = ncbi_16s.blast_db

    # Create search #
    query = BLASTquery(seqs, db)

    # Run #
    query.run()

Ejemplo n.º 25

0

Mostrar archivo

"""We explore the client given inputs, check for problems,
then format them and store them in the repository as immutable artifacts
(compressed text files)"""

import inspect, os, glob, pandas
from fasta import FASTA

current_script = inspect.getframeinfo(inspect.currentframe()).filename
current_dir = os.path.dirname(os.path.abspath(current_script)) + '/'
genomes_dir = current_dir + '../ld12/data/genomes/'

input_dir = "/proj/b2013274/mcl/"
faa_paths = sorted(glob.glob(input_dir + '*.faa'))
fna_paths = sorted(glob.glob(input_dir + '*.fna'))
faas = [FASTA(faa) for faa in faa_paths if '647533246' not in faa]
fnas = [FASTA(fna) for fna in fna_paths if '647533246' not in fna]

faas_nums = [int(g.short_prefix) for g in faas]
fnas_nums = [int(g.short_prefix) for g in fnas]
metadata = pandas.io.parsers.read_csv(current_dir +
                                      '../ld12/data/metadata.tsv',
                                      sep='\t',
                                      index_col=0,
                                      encoding='utf-8')
meta_nums = list(metadata.index)

print set(faas_nums) ^ set(fnas_nums)
print set(faas_nums) ^ set(meta_nums)


def strip(seq):

Ejemplo n.º 26

0

Mostrar archivo

from fasta import FASTA, AlignedFASTA
community = FASTA('community.fasta')
alignment = AlignedFASTA('alignment.fasta')

Ejemplo n.º 27

0

Mostrar archivo

 def all_proteins(self):
     """The main fasta file."""
     return FASTA(self.p.unzipped_proteins)

Ejemplo n.º 28

0

Mostrar archivo

 def to_fasta(self, path):
     with open(path, 'w') as handle:
         for r in self:
             SeqIO.write(r, handle, 'fasta')
     return FASTA(path)