Python SeqIO.convertの例、Bio.SeqIO.convert Pythonの例

コード例 #1

0

ファイルを表示

ファイル: miseq_mc_preprocess.py プロジェクト: heyyyjude/chunlab

    def run_random_select(self):
        # java SeqRandomSampling does not seem to take the abs path for fastq files
        merged_fq = '{}{}'.format(
            self.sample_info.sample_name,
            '.merged.primer_trim.len_trim.fastq'
        )

        cur_dir = os.getcwd()
        os.chdir(self.hulk_sample_dir_path)

        merged_fa = os.path.splitext(merged_fq)[0] + '.fasta'
        SeqIO.convert(merged_fq, 'fastq', merged_fa, 'fasta')

        if not os.path.exists(merged_fq):
            print('{} is not found in {}'.format(
                merged_fq,
                self.hulk_sample_dir_path,
            )
            )

        jar_cmd = ['java SeqRandomSampling',
                   '-c ',
                   self.random_num,
                   '-i',
                   merged_fa,
                   ]

        final_jar_cmd = ' '.join(str(x) for x in jar_cmd)
        os.system(final_jar_cmd)
        os.chdir(cur_dir)

コード例 #2

0

ファイルを表示

ファイル: HW1-fastq2fasta.py プロジェクト: asherkhb/cousework

def main(argv):
	inputfile = ''
	outputfile = ''
	
	try:
		opts, args = getopt.getopt(argv, 'hi:')
	except getopt.GetoptError:
		print "Incorrect syntax: Use '-h' for help."
		with open('fastq2fasta_error-log.txt', 'w') as error:
			error.write("Syntax Error\n")
		sys.exit(2)

	for opt, arg in opts:
		if opt == '-h':
			print('Use Syntax: fastq2fasta.py -i <inputfile>')
			sys.exit()
		elif opt == '-i':
			inputfile = arg
			filename, extension = os.path.splitext(arg)
			outputfile = '%s.fasta' % (filename)			

	if inputfile == '':
		print("Incorrect syntax: Use '-h' for help.")
		with open('fastq2fasta_error-log.txt', 'w') as error:
			error.write("Syntax Error\n")
		sys.exit(2)

	with open(inputfile, 'r') as inpt:
		with open(outputfile, 'w') as otpt:
			SeqIO.convert(inputfile, "fastq", outputfile, "fasta")
			otpt.write(outputfile)
			print('Your FASTQ has been converted. See %s') % (outputfile)

コード例 #3

0

ファイルを表示

ファイル: refpkg_align.py プロジェクト: boegel/pplacer

def align(arguments):
    """
    Align sequences to a reference package alignment.
    """
    refpkg = arguments.refpkg
    prof = arguments.profile_version or refpkg.guess_align_method()
    alignment_func = ALIGNERS[prof]
    alignment_options = (arguments.alignment_options or ALIGNMENT_DEFAULTS.get(prof))

    dn = os.path.dirname(arguments.outfile)
    with _temp_file(prefix='.refpkg_align', dir=dn) as tf:
        tf.close()
        r = alignment_func(refpkg, arguments.seqfile, tf.name,
            use_mask=arguments.use_mask, use_mpi=arguments.use_mpi,
            mpi_args=arguments.mpi_arguments, mpi_program=arguments.mpi_run,
            alignment_options=alignment_options, stdout=arguments.stdout)

        if (not arguments.output_format or
                arguments.output_format == DEFAULT_FORMAT[prof]):
            # No format converseion needed
            os.rename(tf.name, arguments.outfile)
        else:
            # Convert
            SeqIO.convert(tf.name, DEFAULT_FORMAT[prof], arguments.outfile,
                    arguments.output_format)

    return r

コード例 #4

0

ファイルを表示

ファイル: dbprep.py プロジェクト: dcasbioinfo/sifter-t

def stockfastaconvert(options):
    '''
    Conversion of multiple alignment - Stockholm to Multifasta
    '''
    while True:
        try:
            fam = q.get(block=True, timeout=0.1)
        except Empty:
            if n.qsize() > 0:
                for _ in range(n.qsize()):
                    print n.get(),
                print ""
            break
        else:
            n.put(fam)
            if n.qsize() >= 10:
                for _ in range(10):
                    print n.get(),
                print ""
            if os.path.exists(options.dbdir+"align/stockholm/"+fam+".stockholm"):
                if os.path.exists(options.dbdir+"align/"+fam+".fasta"):
                    os.remove(options.dbdir+"align/"+fam+".fasta", "fasta")
                SeqIO.convert(options.dbdir+"align/stockholm/"+fam+".stockholm",
                    "stockholm", options.dbdir+"align/"+fam+".fasta", "fasta")
                handle = open(options.dbdir+"align/"+fam.upper()+".fasta", "r")
                temp = set()
                for nuc_rec in SeqIO.parse(handle, "fasta"):
                    temp.add(nuc_rec.id)
                handle.close()
                handle = open(options.dbdir+"align/gene_list/"+fam.upper()+".gene", "w")
                for gene in temp:
                    handle.write(gene+"\n")
                handle.close()
                os.remove(options.dbdir+"align/stockholm/"+fam+".stockholm")
            q.task_done()

コード例 #5

0

ファイルを表示

ファイル: test_SeqIO_QualityIO.py プロジェクト: dzhang4/biopython

    def simple_check(self, base_name, in_variant):
        for out_variant in ["sanger", "solexa", "illumina"]:
            in_filename = "Quality/%s_original_%s.fastq" \
                          % (base_name, in_variant)
            self.assertTrue(os.path.isfile(in_filename))
            # Load the reference output...
            with open("Quality/%s_as_%s.fastq" % (base_name, out_variant),
                      _universal_read_mode) as handle:
                expected = handle.read()

            with warnings.catch_warnings():
                if out_variant != "sanger":
                    # Ignore data loss warnings from max qualities
                    warnings.simplefilter("ignore", BiopythonWarning)
                    warnings.simplefilter("ignore", UserWarning)
                # Check matches using convert...
                handle = StringIO()
                SeqIO.convert(in_filename, "fastq-"+in_variant,
                              handle, "fastq-"+out_variant)
                self.assertEqual(expected, handle.getvalue())
                # Check matches using parse/write
                handle = StringIO()
                SeqIO.write(SeqIO.parse(in_filename, "fastq-"+in_variant),
                            handle, "fastq-"+out_variant)
                self.assertEqual(expected, handle.getvalue())

コード例 #6

0

ファイルを表示

ファイル: BioPythonUtils.py プロジェクト: bosborne/BioPythonUtils

    def run(self, edit):

        for region in self.view.sel():

            seq_str = self.view.substr(region).strip()

            if not seq_str:
                sublime.error_message("No selected text")
                return

            # Check that the selection begins as expected
            startmatch = re.match(r'^LOCUS', seq_str)
            # It turns out that SeqIO can handle Genbank format that
            # does not end in '//' so there is no need to check for this

            if startmatch:
                # Read from a string and write to a string
                seqout = io.StringIO()

                with io.StringIO(seq_str) as seqin:
                    SeqIO.convert(seqin, 'genbank', seqout, 'fasta')
                seqin.close()

                # Write the fasta string to a new window at position 0
                self.view.window().new_file().insert(
                    edit, 0, seqout.getvalue())
            else:
                sublime.error_message(
                    "Selected text does not look like Genbank: no 'LOCUS'")
                return

コード例 #7

0

ファイルを表示

ファイル: blastselftargets.py プロジェクト: aays/phageParser

def main(gbdir, outdir):
    os.makedirs(gbdir, exist_ok=True)
    os.makedirs(outdir, exist_ok=True)
    tempq = 'tempquery.fasta'
    tempdb = 'tempdb.fasta'
    for org in tqdm(Organism.objects.all()):
        # get genbank and convert to fasta
        fpath = os.path.join(gbdir, '{}.gb'.format(org.accession))
        if not os.path.isfile(fpath):
            print('\nFetching {} with accession {}'.format(
                org.name,
                org.accession
            ))
            fetch(fpath)
        SeqIO.convert(fpath, 'genbank', tempdb, 'fasta')
        # get spacers of organism and convert to fasta
        spacers = Spacer.objects.filter(loci__organism=org)
        fastatext = ''.join(['>{}\n{}\n'.format(spacer.id, spacer.sequence)
                             for spacer in spacers])
        with open(tempq, 'w') as f:
            f.write(fastatext)
        # run blast and save output
        outpath = os.path.join(outdir, '{}.json'.format(org.accession))
        commandargs = ['blastn', '-query', tempq,
                       '-subject', tempdb, '-out', outpath, '-outfmt', '15']
        subprocess.run(commandargs, stdout=subprocess.DEVNULL)

    os.remove(tempq)
    os.remove(tempdb)

コード例 #8

0

ファイルを表示

ファイル: geo_shift.py プロジェクト: matteoferla/Geobacillus

def roundtrip_check(filepath):
    __doc__ = '''Is there any odd data in the genbank that will get lost? Bar for the know things addressed with Botch()'''
    import os
    input_handle = open("TMO.gbk", "rU")
    SeqIO.convert(filepath, "genbank", "test.gbk", "genbank")
    print("went from " + str(os.stat('TMO.gbk').st_size) + " to " + str(os.stat("test.gbk").st_size))
    os.remove("test.gbk")

コード例 #9

0

ファイルを表示

ファイル: generateData.py プロジェクト: stevenhwu/SrpPython

def runSeqGen(workingFile, srp_hap_file, srp_tree_file, debug):

    seqgen_infile = workingFile + "_seqgen.phylip"
    seqgen = runExtProg(seqgenDir + "./seq-gen", pdir=seqgenDir, length=3)
    seqgen.set_param_at("-mHKY", 1)
    seqgen.set_param_at("-t2", 2)
    seqgen.set_param_at("-k1", 3)
#     seqgen.set_param_at("-d0.1", 4)
    # seqgen.set_param_at("-s0.00001", 4)
    seqgen.set_stdin(seqgen_infile)

    all_unique = False
    repeat = 0
    while not all_unique:
        if repeat == 100:
            runBSSC(workingFile, srp_tree_file, debug)
            print "==========rerun BSSC========"
            repeat = 0
        repeat += 1
#        print repeat
        seqgen.run(0)
        all_unique = check_unique_sequences(seqgen)


    temp_handle = open(workingFile + "_seqgen_out.phylip", "w")
    temp_handle.write(seqgen.output)
    temp_handle.close()

    SeqIO.convert(workingFile + "_seqgen_out.phylip", "phylip", workingFile + ".fasta", "fasta")
    shutil.copy(workingFile + ".fasta", srp_hap_file)

コード例 #10

0

ファイルを表示

ファイル: ciriusv1-Linux.py プロジェクト: sfbailey/Cirius-Python

def convertQ2A( qd, qname):
    qpath = qd + "/" + qname + ".fastq"
    cpath = qd + "/" + qname + ".fasta"
    SeqIO.convert(qpath, "fastq", cpath, "fasta")
    print "converted file to fasta"

    return cpath

コード例 #11

0

ファイルを表示

ファイル: illumina2SangerFq.py プロジェクト: B-Rich/gsinghal_python_src

def illumina2sangerFq(inputfile):
    
    print help(SeqIO.convert)
    
    filename = inputfile[:-3]+'.fastq'
    
    SeqIO.convert(inputfile, "fastq-illumina", filename, "fastq")

コード例 #12

0

ファイルを表示

ファイル: convertingLociNGS.py プロジェクト: BioinformaticsArchive/lociNGS

def toNexus (listOfFiles):
	for file in listOfFiles:
		output_handle = file.replace(".fasta", ".nex")
		output_handle = re.sub(".+/.+/", os.getcwd()+"/", output_handle) 
		SeqIO.convert(file, "fasta", output_handle, "nexus", generic_dna)
	dir = os.getcwd()
	return dir

コード例 #13

0

ファイルを表示

ファイル: test_SeqIO_convert.py プロジェクト: addessk/biopython

def check_convert_fails(in_filename, in_format, out_format, alphabet=None):
    qual_truncate = truncation_expected(out_format)
    #We want the SAME error message from parse/write as convert!
    err1 = None
    try:
        records = list(SeqIO.parse(in_filename,in_format, alphabet))
        handle = StringIO()
        if qual_truncate:
            warnings.simplefilter('ignore', UserWarning)
        SeqIO.write(records, handle, out_format)
        if qual_truncate:
            warnings.filters.pop()
        handle.seek(0)
        assert False, "Parse or write should have failed!"
    except ValueError as err:
        err1 = err
    #Now do the conversion...
    try:
        handle2 = StringIO()
        if qual_truncate:
            warnings.simplefilter('ignore', UserWarning)
        SeqIO.convert(in_filename, in_format, handle2, out_format, alphabet)
        if qual_truncate:
            warnings.filters.pop()
        assert False, "Convert should have failed!"
    except ValueError as err2:
        assert str(err1) == str(err2), \
               "Different failures, parse/write:\n%s\nconvert:\n%s" \
               % (err1, err2)

コード例 #14

0

ファイルを表示

ファイル: Genbank_slicer.py プロジェクト: jrjhealey/bioinfo-tools

def convert(basename, genbank):
    '''Convert the provided genbank to a fasta to BLAST.'''

    refFasta = "{}.fasta.tmp".format(basename)
    SeqIO.convert(genbank, 'genbank', refFasta, 'fasta')

    return refFasta

コード例 #15

0

ファイルを表示

ファイル: align_reads.py プロジェクト: pombreda/nextgen_pipeline

def copy_sequence(input_file, output_file):
    '''Copy sequence files from staging area'''
    GZIP_HEADER = '\x1f\x8b'
    BZIP_HEADER = 'BZ'

    pmsg('Copying sequence files', input_file, output_file)
    # check if this is actually a gzipped file
    header = open(input_file).read(2)
    if header == GZIP_HEADER:
        input_file_handle = gzip.open(input_file, 'rb')
    elif header == BZIP_HEADER:
        input_file_handle = BZ2File(input_file, 'r')
    else:
        input_file_handle = open(input_file, 'rb')
    output_file_handle = gzip.open(output_file, 'wb')

    # check whether this is a illumina or sanger fastq file
    try:
        SeqIO.convert(input_file_handle, 'fastq-illumina', output_file_handle, 'fastq-sanger')
    except ValueError as e:
        # check if this is a quality score problem
        if e.args != ('Invalid character in quality string',):
            raise e
        input_file_handle.seek(0)
        output_file_handle.seek(0)
        output_file_handle.writelines(input_file_handle.readlines())
    finally:
        input_file_handle.close()
        output_file_handle.close()

コード例 #16

0

ファイルを表示

ファイル: convert.py プロジェクト: csambles/solidutils

def main():
    parser = OptionParser()
    parser.add_option("-i", "--input", dest="input",
                      help="read INPUT fastq file", metavar="INPUT")

    parser.add_option("-o", "--output", dest="output",
                      help="write OUTPUT fasta file", metavar="OUTPUT")

    parser.add_option("-q", "--qual", dest="qual",
                      help="write OUTPUT qual file", metavar="QUAL")

    (opt, args) = parser.parse_args()

    if opt.input == None:
        print "Missing input file"
        return

    if opt.output == None:
        print "Missing output file"
        return

    print "Converting files..."

    print "Creating csfasta file"
    count = SeqIO.convert(opt.input, "fastq", opt.output, "fasta")

    print "Converted %i records" % count

    if opt.qual != None:
        print "Creating Qual file"
        count = SeqIO.convert(opt.input, "fastq", opt.qual, "qual")
        
        print "Converted %i qual records" % count

コード例 #17

0

ファイルを表示

ファイル: efm-calc.py プロジェクト: stationarysalesman/efm-calculator

def process_file(filepath, organism):
    """Process a single file given by the user on the command line."""

    fasta_filepath = filepath

    # Determine file type
    spstring = re.split('/', filepath)
    fname = spstring[-1].lower()
    fnamesplit = re.split('\.', fname)
    ftype = fnamesplit[-1]
    if ftype == 'gb':
        ftype = 'genbank'
    check_features = (ftype == 'genbank')

    # Open the file and get metadata
    obj_file = SeqIO.read(filepath, ftype)
    features = get_genbank_features(obj_file)
    my_seq = str(obj_file.seq)

    # Create FASTA file if necessary
    if ftype != 'fasta':
        fasta_filepath = "/tmp/" + fnamesplit[0] + ".fasta"
        with open(fasta_filepath, 'w') as handle:
            SeqIO.convert(filepath, "genbank", handle, "fasta")

    # Process the file
    output_dict = process_efm_cli(fasta_filepath, features, my_seq, organism, check_features, fname)
    return output_dict

コード例 #18

0

ファイルを表示

ファイル: NGS.py プロジェクト: mezarino/PyPhyloGenomics

def prepare_data(ionfile, index_length):
    '''
    * Changes quality format from Phred to Solexa (which is required by the fastx-toolkit). 
    * Changes sequences id to incremental numbers.
    * Creates temporal FASTA file with the indexes removed from the sequences.

    Files generated will be written to folder ``data/modified/`` 

    * ``ionfile`` argument is FASTQ format file as produced by IonTorrent
    * ``index_length`` number of base pairs of your indexes. This is necessary \
                       to trim the indexes before blasting the FASTA file      \
                       against the reference gene sequences.

    Example:

    >>> from pyphylogenomics import NGS
    >>> ionfile = "ionrun.fastq";
    >>> index_length = 8;
    >>> NGS.prepare_data(ionfile, index_length);
    Your file has been saved using Solexa quality format as data/modified/wrk_ionfile.fastq
    Your sequence IDs have been changed to numbers.
    The FASTA format file data/modified/wrk_ionfile.fasta has been created.
    '''
    # create folder to keep data
    folder = os.path.join("data", "modified");
    if not os.path.exists(folder):
        os.makedirs(folder);

    # change quality format from Phred to Solexa (required by fastx-toolkit)    
    # write file to work on
    wrkfile = os.path.join(folder, "wrk_ionfile.fastq")
    SeqIO.convert(ionfile, "fastq", wrkfile, "fastq-solexa");
    print "Your file has been saved using Solexa quality format as " + wrkfile

    # change sequences id to incremental numbers
    command = "fastx_renamer -n COUNT -i " + wrkfile + " -o tmp.fastq"
    p = subprocess.check_call(command, shell=True);
    if p != 0:
        print "\nError, couldn't execute " + command;
        sys.exit();
    print "Your sequence IDs have been changed to numbers."

    # replace working file with temporal file
    os.rename("tmp.fastq", wrkfile);

    # create temporal FASTA file
    command = "fastq_to_fasta -i " + wrkfile + " -o tmp.fasta";
    p = subprocess.check_call(command, shell=True);

    # trim index region
    index_length = int(index_length) + 1;
    command  = "fastx_trimmer -f " + str(index_length) + " -i tmp.fasta " 
    command += "-o " + os.path.join(folder, "wrk_ionfile.fasta");
    p = subprocess.check_call(command, shell=True);

    if os.path.isfile("tmp.fasta"):
        os.remove("tmp.fasta");

    print "The FASTA format file " + os.path.join(folder, "wrk_ionfile.fasta") \
            + " has been created.";

コード例 #19

0

ファイルを表示

ファイル: seqio.py プロジェクト: charles-plessy/seq_crumbs

def seqio(in_fhands, out_fhands, out_format, copy_if_same_format=True):
    'It converts sequence files between formats'

    in_formats = [guess_format(fhand) for fhand in in_fhands]

    if (len(in_formats) == 1 and in_formats[0] == out_format and
        hasattr(in_fhands[0], 'name')):
        if copy_if_same_format:
            copyfileobj(in_fhands[0], out_fhands[0])
        else:
            rel_symlink(in_fhands[0].name, out_fhands[0].name)

    elif len(in_fhands) == 1 and len(out_fhands) == 1:
        try:
            SeqIO.convert(in_fhands[0], in_formats[0], out_fhands[0],
                          out_format)
        except ValueError as error:
            if error_quality_disagree(error):
                raise MalformedFile(str(error))
            raise
    elif (len(in_fhands) == 1 and len(out_fhands) == 2 and
          out_format == 'fasta'):
        try:
            for seq in read_seqrecords([in_fhands[0]]):
                SeqIO.write([seq], out_fhands[0], out_format)
                SeqIO.write([seq], out_fhands[1], 'qual')
        except ValueError, error:
            if error_quality_disagree(error):
                raise MalformedFile(str(error))
            raise

コード例 #20

0

ファイルを表示

ファイル: sanger_sync.py プロジェクト: VDBWRAIR/ngs_mapper

def sync_readdata( rawdir, ngsdata ):
    '''
    Ensures that ab1 files are symlinked from the RawData/Sanger/Run directory
    and that they are then converted to fastq
    
    @param rawdir - RawData/Sanger/Run path
    @param ngsdata - Path to root NGSData directory
    '''
    raw_reads = glob( join( rawdir, '*.ab1' ) )
    readd = join( ngsdata, 'ReadData', 'Sanger', basename(rawdir) )
    if not isdir( readd ):
        os.makedirs( readd )
    for read in raw_reads:
        lnk = relpath( read, readd )
        rdpath = join( readd, basename(read) )
        if not exists( rdpath ):
            logger.info( 'Symlinking {0} to {1}'.format(rdpath, lnk) )
            cd = os.getcwd()
            os.symlink( lnk, rdpath )
        else:
            logger.info( 'Skipping existing abi file {0}'.format(rdpath) )
        fqpath = rdpath.replace('.ab1', '.fastq' )
        if not exists( fqpath ):
            logger.info( 'Converting {0} to fastq {1}'.format(rdpath,fqpath) )
            SeqIO.convert( rdpath, 'abi', fqpath, 'fastq' )
        else:
            logger.info( 'Skipping existing fastq file {0}'.format(fqpath) )

コード例 #21

0

ファイルを表示

ファイル: build.py プロジェクト: Tsingke/fammer

def aln2hmm(task):
    """Convert a Clustal alignment to an HMM profile.

    Cleans: .stk
    """
    stk = ext(task.depends[0], 'stk')
    SeqIO.convert(str(task.depends[0]), 'clustal', stk, 'stockholm')
    sh('hmmbuild %s %s' % (task.target, stk))

コード例 #22

0

ファイルを表示

ファイル: cmalign.py プロジェクト: fhcrc/deenurp

def cmalign(infile, outfile, cpu):

    with util.ntf(suffix='.sto') as a_sto, open(outfile, 'w') as a_fasta:
        scores = wrap.cmalign_files(infile, a_sto.name, cpu=cpu)
        SeqIO.convert(a_sto, 'stockholm', a_fasta, 'fasta')
        a_fasta.flush()

    return scores

コード例 #23

0

ファイルを表示

ファイル: webplugin.py プロジェクト: UdeM-LBIT/polytomy-solver-web

def convert(in_file, in_format, out_format, treeid, seq_data_type):
    out_file = TMP_UTILS_PATH + treeid + "." + out_format
    #SeqIO responds to clustal as identifier for the format but the official extension is .aln
    if seq_data_type == "clustal":
        seq_data_type = ".aln"
    SeqIO.convert(in_file, in_format, out_file, out_format, alphabet=SEQUENCE_ALPHABET[seq_data_type])
    os.remove(in_file)
    return out_file

コード例 #24

0

ファイルを表示

ファイル: Utility.py プロジェクト: plpla/furry-bear

def formatFasta(inFile, outFile):
    """
    Sometimes, there are format errror in fasta files. That should correct them
    :param inFile: A fasta files that might contains format error
    :param outFile: A fasta file with no format error
    :return: Nothing
    """
    SeqIO.convert(inFile, "fasta", outFile, "fasta")

コード例 #25

0

ファイルを表示

ファイル: ReadSimulator.py プロジェクト: nyoungb2/seqDB_tools

    def parallel(self, names, fileType='fasta', nprocs=1, **kwargs):
        """Running simulator using apply_async

        Args:
        names -- NameFile class with iter_names() method
        fileType -- sequence file format
        nprocs -- max number of parallel simulation calls
        kwargs -- passed to simulator

        Attribs added to each name instance in names:
        simReadsFile -- file name of simulated reads
        simReadsFileType -- file type (eg., 'fasta' or 'fastq')
        simReadsFileCount -- number of simulated reads

        Return:
        boolean on run success/fail
        """
        # making list of fasta file to provide simulator call
        fastaFiles = [name.get_fastaFile() for name in names.iter_names()]

        # settig kwargs
        new_simulator = partial(self, **kwargs)

        # calling simulator
        res = parmap.map(new_simulator, fastaFiles, processes=nprocs)

        # checking that simulated reads were created for all references; return 1 if no file
        for row in res:
            if row['simReadsFile'] is None or not os.path.isfile(row['simReadsFile']):
                return 1
            elif os.stat(row['simReadsFile'])[0] == 0:  # file size = 0
                return 1
        
        # converting reads to fasta if needed
        if fileType.lower() == 'fasta':
            for result in res:
                simFile = result['simReadsFile']
                fileType = result['simReadsFileType'].lower()
                if fileType != 'fasta':
                    fastaFile = os.path.splitext(simFile)[0] + '.fna'
                    SeqIO.convert(simFile, fileType, fastaFile, 'fasta')
                    result['simReadsFile'] = fastaFile
                    result['simReadsFileType'] = 'fasta'
                    
        # setting attribs in name instances                    
        for i,name in enumerate(names.iter_names()):
            # read file
            simReadsFile = res[i]['simReadsFile']
            name.set_simReadsFile(simReadsFile)
            # file type
            fileType = res[i]['simReadsFileType'].lower()
            name.set_simReadsFileType(fileType)
            # number of simulated reads            
            num_reads = len([True for i in SeqIO.parse(simReadsFile, fileType)])
            name.set_simReadsCount(num_reads)
            
        return 0

コード例 #26

0

ファイルを表示

ファイル: test_fasta_input.py プロジェクト: VDBWRAIR/ngs_mapper

 def setUp(self):
     self.fastaInputDir = tempfile.mkdtemp()
     self.fastqInputDir = tempfile.mkdtemp()
     self.fastaOutputDir = join(dirname(self.fastaInputDir), 'fastaout')
     self.fastqOutputDir = join(dirname(self.fastqInputDir), 'fastqout')
     fq = here(inputFastq)
     fa = "R1.fasta"
     shutil.copy(fq, self.fastqInputDir)
     SeqIO.convert(fq, 'fastq', join(self.fastaInputDir, fa), 'fasta')

コード例 #27

0

ファイルを表示

ファイル: script.py プロジェクト: shamansim/Rosalind

def main(fichier):
	"""
		convert fastq into fasta
	"""
	from Bio import SeqIO
	handle = open(fichier, "r")
	g = open('output.txt','w')
	SeqIO.convert(handle, 'fastq', g, 'fasta' )
	handle.close()
	g.close()

コード例 #28

0

ファイルを表示

ファイル: __init__.py プロジェクト: Ivan-Castro/RNAtk-v0.2.0

def fas2clus(inFile):
    """
    Convert the input file from fasta format to clustalw format.
    Modules required:
    - SeqIO (from Bio)
    Usage: <file>
    """
    SeqIO.convert(inFile, 'fasta', inFile + 'cl', 'clustal')
    clustalFile = open(inFile + 'cl')
    return clustalFile

コード例 #29

0

ファイルを表示

ファイル: gbk_to_fasta.py プロジェクト: bfrgoncalves/sequence_parsers

def main():

	parser = argparse.ArgumentParser(description="This program parses a .gbk file to a .fasta file")
	parser.add_argument('-gbk', nargs='?', type=str, help=".gbk file", required=True)
	parser.add_argument('-o', nargs='?', type=str, help="results file name", required=True)


	args = parser.parse_args()

	SeqIO.convert(args.gbk, "genbank", args.o, "fasta")

コード例 #30

0

ファイルを表示

ファイル: abi2fastq.py プロジェクト: psikon/misc-scripts

def convert2fastq(input, output):
	for file in os.listdir(input):
		if file.endswith(".ab1"):
			abi = open(input + os.sep + file, 'rb')
			tmp = output + os.sep + "tmp"
			if not os.path.exists(tmp):
				os.makedirs(tmp)
			out = tmp + os.sep + str(file).split('.')[0] + '.fastq'
			SeqIO.convert(abi , 'abi', out, 'fastq')
	return(tmp)

コード例 #31

0

ファイルを表示

def check_convert(in_filename, in_format, out_format, alphabet=None):
    records = list(SeqIO.parse(open(in_filename), in_format, alphabet))
    #Write it out...
    handle = StringIO()
    qual_truncate = truncation_expected(out_format)
    if qual_truncate:
        warnings.simplefilter('ignore', UserWarning)
    SeqIO.write(records, handle, out_format)
    if qual_truncate:
        warnings.filters.pop()
    handle.seek(0)
    #Now load it back and check it agrees,
    records2 = list(SeqIO.parse(handle, out_format, alphabet))
    compare_records(records, records2, qual_truncate)
    #Finally, use the convert fuction, and check that agrees:
    handle2 = StringIO()
    if qual_truncate:
        warnings.simplefilter('ignore', UserWarning)
    SeqIO.convert(in_filename, in_format, handle2, out_format, alphabet)
    if qual_truncate:
        warnings.filters.pop()
    #We could re-parse this, but it is simpler and stricter:
    assert handle.getvalue() == handle2.getvalue()

コード例 #32

0

ファイルを表示

ファイル: ny_clean.py プロジェクト: carinalewandowski/covid-phylo

def prep_for_beast():
    # print file to NEXUS file
    ny_aligned = list(SeqIO.parse('final_ny_aligned.txt', "fasta"))
    for record in ny_aligned:
        desc = record.description.split(" ")
        record.id = desc[1]
        record.description = desc[1]
    SeqIO.write(ny_aligned, 'final_ny_aligned_name_fixed.txt', "fasta")
    count = SeqIO.convert("final_ny_aligned_name_fixed.txt",
                          "fasta",
                          "final_ny_aligned.nex",
                          "nexus",
                          alphabet=IUPAC.ambiguous_dna)
    print("Converted %i records" % count)

コード例 #33

0

ファイルを表示

ファイル: filter_outliers.py プロジェクト: sminot/deenurp

def distmat_cmalign(
        sequence_file,
        prefix,
        cpu=wrap.CMALIGN_THREADS,
        min_bitscore=10):

    with util.ntf(prefix=prefix, suffix='.aln') as a_sto, \
            util.ntf(prefix=prefix, suffix='.fasta') as a_fasta:

        scores = wrap.cmalign_files(sequence_file, a_sto.name, cpu=cpu)

        low_scores = scores['bit_sc'] < min_bitscore
        if low_scores.any():
            msg = 'The following sequences aligned with bit score < {}: {}'
            log.warning(msg.format(min_bitscore, scores[low_scores].index))

        # FastTree requires FASTA
        SeqIO.convert(a_sto, 'stockholm', a_fasta, 'fasta')
        a_fasta.flush()

        taxa, distmat = outliers.fasttree_dists(a_fasta.name)

    return taxa, distmat

コード例 #34

0

ファイルを表示

ファイル: MetaFile.py プロジェクト: lhudson08/seqDB_tools

    def to_fasta(self, rmFile=False):
        """Converting from fastq to fasta.

        Args:
        rmFile -- remove old version of file?

        Attrib edit:
        readFile name set to new file (*.fasta)
        readFileFormat set to fasta format        
        """
        # unpack
        readFile = self.get_readFile()
        readFileFormat = self.get_readFileFormat()

        # new file name
        basename, ext = os.path.splitext(readFile)
        ## rename file to prevent overwrite
        if ext == '.fasta':
            os.rename(readFile, basename + '.tmp')
            readFile = basename + '.tmp'
        newFile = basename + '.fasta'

        # convert
        try:
            SeqIO.convert(readFile, readFileFormat, newFile, 'fasta')
        except ValueError:
            return False

        # remove
        if rmFile:
            os.remove(readFile)

        # setting attributes
        self.set_readFile(newFile)
        self.set_readFileFormat('fasta')

        return True

コード例 #35

0

ファイルを表示

def main():
    ap = GooeyParser(
        description=
        "splits a fasta file with user specified length and fragment overlap")
    ap.add_argument("-in",
                    "--input",
                    required=True,
                    widget='FileChooser',
                    help="input fasta file")
    ap.add_argument("-step",
                    "--step",
                    required=True,
                    help="step size to split fasta, type = int")
    ap.add_argument("-win",
                    "--window",
                    required=True,
                    help="window size of splitted subsets, type = int")
    ap.add_argument("-out",
                    "--output",
                    required=True,
                    widget='FileSaver',
                    help="output fasta file")
    args = vars(ap.parse_args())
    # main
    sequences = []
    headers = []  # setup empty lists
    for record in SeqIO.parse(args['input'], "fasta"):
        for i in range(0,
                       len(record.seq) - int(args['window']) + 1,
                       int(args['step'])):
            sequences.append(record.seq[i:i + int(args['window'])])
            headers.append(i)
# create data frame
    df = pd.DataFrame()
    df['id'] = headers
    df['seq'] = sequences
    # export
    with open("out.tab", 'a') as f:
        f.write(
            df.to_csv(header=False,
                      index=False,
                      sep='\t',
                      doublequote=False,
                      line_terminator='\n'))


# convert to fasta
    convert = SeqIO.convert("out.tab", "tab", args['output'], "fasta")
    os.system("del out.tab")

コード例 #36

0

ファイルを表示

 def simple_check(self, base_name, in_variant):
     for out_variant in ["sanger", "solexa", "illumina"]:
         if out_variant != "sanger":
             #Ignore data loss warnings from max qualities
             warnings.simplefilter('ignore', BiopythonWarning)
         in_filename = "Quality/%s_original_%s.fastq" \
                       % (base_name, in_variant)
         self.assertTrue(os.path.isfile(in_filename))
         #Load the reference output...
         with open("Quality/%s_as_%s.fastq" % (base_name, out_variant),
                   "rU") as handle:
             expected = handle.read()
         #Check matches using convert...
         handle = StringIO()
         SeqIO.convert(in_filename, "fastq-" + in_variant, handle,
                       "fastq-" + out_variant)
         self.assertEqual(expected, handle.getvalue())
         #Check matches using parse/write
         handle = StringIO()
         SeqIO.write(SeqIO.parse(in_filename, "fastq-" + in_variant),
                     handle, "fastq-" + out_variant)
         self.assertEqual(expected, handle.getvalue())
         if out_variant != "sanger":
             warnings.filters.pop()

コード例 #37

0

ファイルを表示

ファイル: ReadSanger.py プロジェクト: zhaiqt/parse_sanger

def read_sanger(infilepath,outfilepath):
    count_infile = 0
    print infilepath
    for filename in os.listdir(infilepath):
        #print "infilepath :" +infilepath
        #print filename
        if filename.endswith('.ab1'):
            abi_filename=os.path.join(infilepath,filename)
            fastq_filename= os.path.join(outfilepath,filename.replace('.ab1','.fastq'))
            #print abi_filename
            ##print fastq_filename
            SeqIO.convert(abi_filename,'abi',fastq_filename,'fastq')
            '''
            if filename.endswith("QB5505.ab1"):
                F_fastq_filename =os.path.join(outfilepath_F,filename.replace('.ab1','.fastq'))
                SeqIO.convert(abi_filename,'abi',F_fastq_filename,'fastq')
            else:
                R_fastq_filename =os.path.join(outfilepath_R,filename.replace('.ab1','.fastq'))
                SeqIO.convert(abi_filename,'abi',R_fastq_filename,'fastq')
            '''
            count_infile += 1
    print "There are total %d sequences" %count_infile

    return

コード例 #38

0

ファイルを表示

ファイル: similarity.py プロジェクト: dani2s/ReverSa

 def generate_dist(self):
     mafft_cline = MafftCommandline(input=self.fasta_seq,
                                    maxiterate=1000,
                                    localpair=True,
                                    phylipout=True)
     stdout, stderr = mafft_cline()
     #Save alignments into  FASTA and PHYLIP format
     phyFile = 'testing/alignment.phy'
     outPhy = open(phyFile, 'w')
     outPhy.write(stdout)
     outPhy.close()
     fastaFile = 'testing/align.fasta'
     SeqIO.convert(phyFile, 'phylip', fastaFile, 'fasta')
     #Create phylogenetic tree of the original sequences
     raxml_cline = RaxmlCommandline(sequences=phyFile,
                                    model='GTRGAMMA',
                                    name='reversatest',
                                    working_dir=self.cwPath)
     raxml_cline()
     #Calculate the phylo distances between each branch of the tree
     tree = dendropy.Tree.get_from_path("testing/RAxML_result.reversatest",
                                        "newick")
     pdm = tree.phylogenetic_distance_matrix()
     pdm.write_csv('distance.csv')

コード例 #39

0

ファイルを表示

ファイル: test_SeqIO_convert.py プロジェクト: paultalent/biopython

 def check_conversion(self, filename, in_format, out_format, alphabet):
     msg = "Convert %s from %s to %s" % (filename, in_format, out_format)
     records = list(SeqIO.parse(filename, in_format, alphabet))
     # Write it out...
     handle = StringIO()
     qual_truncate = truncation_expected(out_format)
     with warnings.catch_warnings():
         if qual_truncate:
             warnings.simplefilter("ignore", BiopythonWarning)
         SeqIO.write(records, handle, out_format)
     handle.seek(0)
     # Now load it back and check it agrees,
     records2 = list(SeqIO.parse(handle, out_format, alphabet))
     self.assertEqual(len(records), len(records2), msg=msg)
     for record1, record2 in zip(records, records2):
         self.compare_record(record1, record2, qual_truncate, msg=msg)
     # Finally, use the convert function, and check that agrees:
     handle2 = StringIO()
     with warnings.catch_warnings():
         if qual_truncate:
             warnings.simplefilter("ignore", BiopythonWarning)
         SeqIO.convert(filename, in_format, handle2, out_format, alphabet)
     # We could re-parse this, but it is simpler and stricter:
     self.assertEqual(handle.getvalue(), handle2.getvalue(), msg=msg)

コード例 #40

0

ファイルを表示

def ficheirosProteinas(dicionario):
    Entrez.email = "*****@*****.**"

    i = 0
    lista_ficheiros = []

    for key in dicionario:
        handleGB = Entrez.efetch(db="protein",
                                 rettype="gb",
                                 retmode="text",
                                 id=key)
        seq_record = SeqIO.read(handleGB, "genbank")
        nome_ficheiro = 'sequenceProtGenbank' + str(i) + '.gb'
        SeqIO.write(seq_record, nome_ficheiro,
                    "genbank")  #Guarda em formato genbank
        lista_ficheiros.append(nome_ficheiro)
        handleGB.close()

        SeqIO.convert('sequenceProtGenbank' + str(i) + '.gb', "genbank",
                      'sequenceProtF' + str(i) + '.fasta', "fasta")

        i += 1

    return lista_ficheiros

コード例 #41

0

ファイルを表示

def groom(in_file, in_qual="fastq-sanger", out_dir=None, out_file=None):
    """
    Grooms a FASTQ file into sanger format, if it is not already in that
    format. Use fastq-illumina for Illumina 1.3-1.7 qualities and
    fastq-solexa for the original solexa qualities. When in doubt, your
    sequences are probably fastq-sanger.

    """
    if in_qual == "fastq-sanger":
        logger.info("%s is already in Sanger format." % (in_file))
        return out_file
    with file_transaction(out_file) as tmp_out_file:
        count = SeqIO.convert(in_file, in_qual, tmp_out_file, "fastq-sanger")
    logger.info("Converted %d reads in %s to %s." % (count, in_file, out_file))
    return out_file

コード例 #42

0

ファイルを表示

 def check(self, sff_name, sff_format, out_name, format):
     wanted = list(SeqIO.parse(out_name, format))
     data = StringIO()
     count = SeqIO.convert(sff_name, sff_format, data, format)
     self.assertEqual(count, len(wanted))
     data.seek(0)
     converted = list(SeqIO.parse(data, format))
     self.assertEqual(len(wanted), len(converted))
     for old, new in zip(wanted, converted):
         self.assertEqual(old.id, new.id)
         self.assertEqual(old.name, new.name)
         if format != "qual":
             self.assertEqual(str(old.seq), str(new.seq))
         elif format != "fasta":
             self.assertEqual(old.letter_annotations["phred_quality"],
                              new.letter_annotations["phred_quality"])

コード例 #43

0

ファイルを表示

def countKmerMatch(fas, kmer_list, outfile, is_fastq):
	match = []
	total_count = 0
	num_found_markers = 0
	handle = StringIO("")
	num_elem = SeqIO.convert(fas, "fastq", handle, "fasta") # there must be an easyer way to get the number of sequences?
	bar = progressbar.ProgressBar(redirect_stdout=True, max_value=num_elem)
	with open(kmer_list,'r') as fin:
		with open(outfile + '.fasta', "w") as output_fasta_handle:
			lines =  fin.read().splitlines()
			if is_fastq:
				with open(outfile + '.fastq', "w") as output_fastq_handle:
					i = 0
					for record in SeqIO.parse(fas, "fastq"):
						read = record.seq
						i = i + 1
						num = 0
						bar.update(i)
						for plasmid_sequence in lines:
							sequence = Seq(plasmid_sequence)
							num = num + int(read.count(sequence))
						if num > 0:
							print("marker found at read number %d" %(i))
							num_found_markers = num_found_markers + 1
							match.append('1')
							# write to fasta file
							SeqIO.write(record, output_fastq_handle, "fastq") 
							SeqIO.write(record, output_fasta_handle, "fasta")
							total_count = total_count + 1
						else:
							match.append('0')
			else:
				for record in SeqIO.parse(fas, "fasta"):
					read = record.seq
					num = 0
					for plasmid_sequence in lines:
						sequence = Seq(plasmid_sequence)
						num = num + int(read.count(sequence))
					if num > 0:
						match.append('1')
						# write to fasta file
						SeqIO.write(record, output_fasta_handle, "fasta")
						total_count = total_count + 1
					else:
						match.append('0')
	return match, total_count

コード例 #44

0

ファイルを表示

ファイル: fasta_to_tab_gui.py プロジェクト: olgatsiouri1996/bioinfo_gui_scripts

def main():
    ap = GooeyParser(
        description=
        "converts a fasta file , into a tabular file with identifier and sequence"
    )
    ap.add_argument("-in",
                    "--input",
                    required=True,
                    widget='FileChooser',
                    help="input fasta file")
    ap.add_argument("-out",
                    "--output",
                    required=True,
                    widget='FileSaver',
                    help="output tab seperated file")
    args = vars(ap.parse_args())
    # main
    count = SeqIO.convert(args['input'], "fasta", args['output'], "tab")

コード例 #45

0

ファイルを表示

ファイル: get_core2.py プロジェクト: erprateek/Phylogenetic-Pipeline

def gb_to_fasta(dirpath):
    for filename in os.listdir(dirpath):
        if filename.endswith("gb") or filename.endswith("gbk"):
            outname = 'concat-' + filename + '.fas'
            count = SeqIO.convert(filename, "genbank", outname, "fasta")
            print "Genbank file found - " + filename
            print "Generating " + outname + " ...Done!"
            f = open(outname)
            lines = f.readlines()
            f.close()
            f = open(outname, "w")
            for line in lines:
                if line.startswith(">"):
                    line = ">" + filename + "\n"
                    f.write(line)
                else:
                    f.write(line)
            f.close()

コード例 #46

0

ファイルを表示

def main():
    ap = GooeyParser(
        description=
        "converts a pdb file with only the atom coordinates section, into a fasta file"
    )
    ap.add_argument("-in",
                    "--input",
                    required=True,
                    widget='FileChooser',
                    help="input pdb file without SEQRES header")
    ap.add_argument("-out",
                    "--output",
                    required=True,
                    widget='FileSaver',
                    help="output fasta file")
    args = vars(ap.parse_args())
    # main
    count = SeqIO.convert(args['input'], "pdb-atom", args['output'], "fasta")

コード例 #47

0

ファイルを表示

ファイル: SeqHandler.py プロジェクト: mscook/seqhandler

def convertMod(args):
    if args.outFormat.lower() == 'gff':
        acceptable = ['genbank', 'embl']
        if args.inFormat.lower() in acceptable:
            to_GFF(args)
            return None
        else:
            sys.err.write("ERROR: ValueError, Could not convert file\n") 
            return None
    else:
        try:
            count = SeqIO.convert(args.input, args.inFormat, args.output, args.outFormat )
            if count == 0:
                sys.err.write('ERROR: No records converted. Possibly wrong input filetype\n')
            else: 
                if args.verbose: sys.err.write("Converted %i records\n" %count)
        except ValueError:
            sys.err.write("ERROR: ValueError, Could not convert file\n")
        return None

コード例 #48

0

ファイルを表示

ファイル: 1.0.py プロジェクト: yanmc/macaca-antibodyomics

def main():
	print "Begin!"
	prj_folder = os.getcwd()	
	print "Quality contorl..."
	infiles = glob.glob("%s/1.0-origin/*.fastq"%(prj_folder))
	if len(infiles) != 2:
		print "The %s be loaded in error, are they two?"%infiles
	for the_file in infiles:
		trim_fastq_by_quality(the_file,prj_folder)	
	print "Merging..."	
	infiles = glob.glob("%s/1.1-trimed-fastq-file/*.fastq"%(prj_folder))
	
	os.chdir("%s/1.2-merged-fastq-file/"%(prj_folder))
	merge = subprocess.call("pear -f %s -r %s -o %s"%(infiles[0],infiles[1],project),shell=True)
	
	print "Convert fastq to fasta..."
	merged_file = "%s/1.2-merged-fastq-file/%s.assembled.fastq"%(prj_folder, project)
	fname, suffix = os.path.splitext(merged_file)
	count = SeqIO.convert(merged_file, "fastq","%s.fasta"%fname, "fasta")
	print count
	print "There are  %i records have been Converted!" %(count)
	
	print "Unique fasta file..."
	unique_fasta(prj_folder, project)
	
	print "Split large file to small..."
	os.chdir("%s/1.3-splited-fasta-file/"%(prj_folder))
	record_iter = SeqIO.parse(open("%s_unique.fasta"%fname), "fasta")
	for i, batch in enumerate(batch_iterator(record_iter, 10000)) :
		filename = "%s-%i.fasta" % (project, i+1)
		handle = open(filename, "w")
		count = SeqIO.write(batch, handle, "fasta")
		handle.close()
		print "Wrote %i records to %s" % (count, filename)
	print "Begin IgBLAST..."
	os.chdir("%s/1.4-IgBLAST-output/"%(prj_folder))
	IgBLAST_result = open("%s-igblast-output.txt "%(project),"w")	
	mv_database = subprocess.call("cp -r /zzh_gpfs/home/zzhgroup/yanmingchen/IgBLAST_database/ ./",shell = True)
	#IgBLAST_run = subprocess.call("igblastn -germline_db_V ./IgBLAST_database/20150429-human-gl-v -germline_db_J ./IgBLAST_database/20150429-human-gl-j -germline_db_D ./IgBLAST_database/20150429-human-gl-d -organism human -domain_system imgt -query %s -auxiliary_data optional_file/human_gl.aux -outfmt '7 qseqid sseqid pident length mismatch gapopen gaps qstart qend sstart send evalue bitscore qlen slen qseq sseq score frames qframe sframe positive ppos btop staxids stitle sstrand qcovs qcovhsp' -num_alignments_V 10 -num_alignments_D 10 -num_alignments_J 10 -out IgBLAST_result"%merged_file,shell=True)
	IGBLAST_infiles = glob.glob("%s/1.3-splited-fasta-file/*.fasta"%(prj_folder))
	for index, the_file in enumerate(IGBLAST_infiles):
		print index,the_file
		IgBLAST_run = subprocess.call("igblastn -germline_db_V ./IgBLAST_database/20150429-human-gl-v -germline_db_J ./IgBLAST_database/20150429-human-gl-j -germline_db_D ./IgBLAST_database/20150429-human-gl-d -organism human -domain_system imgt -query %s -auxiliary_data optional_file/human_gl.aux -outfmt '7 qseqid sseqid pident length mismatch gapopen gaps qstart qend sstart send evalue bitscore qlen slen qseq sseq score frames qframe sframe positive ppos btop staxids stitle sstrand qcovs qcovhsp' -num_alignments_V 10 -num_alignments_D 10 -num_alignments_J 10 -out IgBLAST_result_%i &"%(the_file,index+1),shell=True)

コード例 #49

0

ファイルを表示

def main():
    if sys.version_info[0] < 3:
        sys.exit(
            'Must be using Python 3. Try calling "python3 genbank_2_embl.py"')

    parser = argparse.ArgumentParser(
        prog='genbank_2_embl.py',
        description='Converts a genbank file into a embl file',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--version',
                        help='Version information',
                        action='version',
                        version=str('%(prog)s v' + version))

    parser_required = parser.add_argument_group('Required options')
    parser_required.add_argument('-g',
                                 '--genbank',
                                 nargs=1,
                                 type=argparse.FileType('r'),
                                 required=True,
                                 metavar='/path/to/genbank/file.gb',
                                 help='Path to the genbank file')
    parser_required.add_argument('-e',
                                 '--embl',
                                 nargs=1,
                                 type=str,
                                 metavar='/path/to/output/embl/file.embl',
                                 help='Path to the output embl file',
                                 required=True)

    args = parser.parse_args()

    args.genbank = os.path.abspath(args.genbank[0].name)
    args.embl = os.path.abspath(args.embl[0])

    if not os.path.isdir(os.path.dirname(args.embl)):
        os.makedirs(os.path.dirname(args.embl))

    count = SeqIO.convert(args.genbank, 'genbank', args.embl, 'embl')
    print('Converted {} records'.format(count))

コード例 #50

0

ファイルを表示

def readwrite_fasta(infilename, outfilename):
    try:
        if compression_type in ["gzip", "gz"]:
            infile = gzip.open(infilename, 'r')
        elif compression_type in ["bzip2", "bz2"]:
            infile = bz2.BZ2File(infilename, 'r')
        elif compression_type == "zip":
            myzipfile = zipfile.ZipFile(infilename, 'r')
            if len(myzipfile.namelist()) > 1:
                raise IOError, "TOO MANY FILES IN ZIPFILE"
            else:
                infile = myzipfile.open(myzipfile.namelist()[0])
        else:
            infile = open(infilename, 'r')
        outfile = open(outfilename, "w")
        outcounter = SeqIO.convert(infile, "fastq-sanger", outfile, "fasta")
        outfile.close()
        infile.close()
        return outcounter
    except Exception, ex:
        print ex.__class__.__name__ + " : " + str(ex)
        return None

コード例 #51

0

ファイルを表示

ファイル: dataio.py プロジェクト: joaks1/SeqSift

def convert_format(in_file,
                   out_file,
                   in_format=None,
                   out_format=None,
                   data_type='dna',
                   ambiguities=True):
    if in_format == None:
        in_format = FILE_FORMATS.get_format_from_file_object(in_file)
    if out_format == None:
        out_format = FILE_FORMATS.get_format_from_file_object(out_file)
    _LOG.debug("converting {in_format}-formatted file {in_file!r} to "
               "{out_format}-formatted file {out_file!r}.".format(
                   in_file=in_file,
                   in_format=in_format,
                   out_file=out_file,
                   out_format=out_format))
    nseqs = SeqIO.convert(in_file=in_file,
                          in_format=in_format,
                          out_file=out_file,
                          out_format=out_format,
                          alphabet=get_state_alphabet(data_type, ambiguities))
    return nseqs

コード例 #52

0

ファイルを表示

ファイル: test_SeqIO_QualityIO.py プロジェクト: bugra-emanet/biopython-bugra-emanet

    def test_qual_negative(self):
        """Check QUAL negative scores mapped to PHRED zero."""
        data = """>1117_10_107_F3
23 31 -1 -1 -1 29 -1 -1 20 32 -1 18 25 7 -1 6 -1 -1 -1 30 -1 20 13 7 -1 -1 21 30 -1 24 -1 22 -1 -1 22 14 -1 12 26 21 -1 5 -1 -1 -1 20 -1 -1 12 28
>1117_10_146_F3
20 33 -1 -1 -1 29 -1 -1 28 28 -1 7 16 5 -1 30 -1 -1 -1 14 -1 4 13 4 -1 -1 11 13 -1 5 -1 7 -1 -1 10 16 -1 4 12 15 -1 8 -1 -1 -1 16 -1 -1 10 4
>1117_10_1017_F3
33 33 -1 -1 -1 27 -1 -1 17 16 -1 28 24 11 -1 6 -1 -1 -1 29 -1 8 29 24 -1 -1 8 8 -1 20 -1 13 -1 -1 8 13 -1 28 10 24 -1 10 -1 -1 -1 4 -1 -1 7 6
>1117_11_136_F3
16 22 -1 -1 -1 33 -1 -1 30 27 -1 27 28 32 -1 29 -1 -1 -1 27 -1 18 9 6 -1 -1 23 16 -1 26 -1 5 7 -1 22 7 -1 18 14 8 -1 8 -1 -1 -1 11 -1 -1 4 24"""  # noqa : W291
        h = StringIO(data)
        h2 = StringIO()
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", BiopythonParserWarning)
            self.assertEqual(4, SeqIO.convert(h, "qual", h2, "fastq"))
        self.assertEqual(
            h2.getvalue(),
            """\
@1117_10_107_F3
??????????????????????????????????????????????????
+
8@!!!>!!5A!3:(!'!!!?!5.(!!6?!9!7!!7/!-;6!&!!!5!!-=
@1117_10_146_F3
??????????????????????????????????????????????????
+
5B!!!>!!==!(1&!?!!!/!%.%!!,.!&!(!!+1!%-0!)!!!1!!+%
@1117_10_1017_F3
??????????????????????????????????????????????????
+
BB!!!<!!21!=9,!'!!!>!)>9!!))!5!.!!).!=+9!+!!!%!!('
@1117_11_136_F3
??????????????????????????????????????????????????
+
17!!!B!!?<!<=A!>!!!<!3*'!!81!;!&(!7(!3/)!)!!!,!!%9
""",
        )

コード例 #53

0

ファイルを表示

ファイル: gb2genome.py プロジェクト: yyxql/bcbio-nextgen

                print >>out_handle, gtf_gene
                gtf_tx = '%s\t%s\t%s\t%s\t%s\t.\t%s\t.\t%s;' % ( acc, source, 'transcript', f.location.start.position+1, f.location.end.position, strand, comments )
                print >>out_handle, gtf_tx
                comments += '; exon_number "1"'
                gtf_exon = '%s\t%s\t%s\t%s\t%s\t.\t%s\t.\t%s;' % ( acc, source, 'exon', f.location.start.position+1, f.location.end.position, strand, comments )
                print >>out_handle, gtf_exon

            sys.stderr.write( "%s\tSkipped %s entries having types: %s.\n" % ( gb.id,skipped, ', '.join(skippedTypes) ) )

if __name__=='__main__':
    description = ("Convert GeneBank files to FASTA and GTF bcbio ready files.")

    parser = ArgumentParser(description=description)
    parser.add_argument("--gbk", required=True, help="GBK files")
    parser.add_argument("--prefix", required=True, help="prefix")
    args = parser.parse_args()

    t0=datetime.now()

    count = SeqIO.convert(args.gbk, "genbank", args.prefix + "_tmp.fa", "fasta")
    with open(args.prefix + ".fa", "w") as out_handle:
        with open(args.prefix + "_tmp.fa") as in_handle:
            header = next(in_handle)
            print >>out_handle, header.split()[0]
            for line in in_handle:
                print >>out_handle, line.strip()

    gb2gtf(args.gbk, args.prefix + ".gtf")
    dt=datetime.now() - t0
    sys.stderr.write( "#Time elapsed: %s\n" % dt )

コード例 #54

0

ファイルを表示

ファイル: fastq-fasta.py プロジェクト: connor-lab/fieldbioinformatics

#!/usr/bin/env python

from Bio import SeqIO
import sys

SeqIO.convert(sys.argv[1], "fastq", sys.argv[2], "fasta")

コード例 #55

0

ファイルを表示

def converter(fq):
    output_fa = os.path.splitext(fq)[0] + '.fa'
    SeqIO.convert(fq, 'fastq', output_fa, 'fasta')

コード例 #56

0

ファイルを表示

ファイル: reannotate2.py プロジェクト: CamEJ/open_utils

          len(genbankdf[genbankdf.locus_tag == 'none']))
    genbankdf = genbankdf[genbankdf.locus_tag != "none"]

genbankdf.reset_index(level=0, inplace=True)
#  we split with pipes "|" later on;  this rmoves any from locus_tags , bd_xref's, and old_locus_tags
try:
    genbankdf.db_xref = genbankdf.db_xref.str.replace("|", "_")
    genbankdf.locus_tag = genbankdf.locus_tag.str.replace("|", "_")
    genbankdf.old_locus_tag = genbankdf.old_locus_tag.str.replace("|", "_")
except KeyError:
    pass
#print(genbankdf.loc[genbankdf['locus_tag'] == 'QV15_00005'])   ### just a test
#%%#%%#  make a fasta for genomic sequence(s), clean up header in resulting file
output_genfasta_handle = open(os.path.join(subdirname, gb + "_genomic.fasta"),
                              "w")
SeqIO.convert(input_handle.name, "genbank", output_genfasta_handle, "fasta")
output_genfasta_handle.close()
#%%

if rename_fa:
    print("After running this script, run the following output as a command" +
          ", starting with 'awk' and ending with _renamed.fasta:")
    import string
    input_handle = open(input_genome, "r")  # input database
    names = []
    for record in SeqIO.parse(input_handle, "genbank"):
        print(record.id)
        names.append(record.id)
    if len(names) > 1:
        starts_at = names[0][-1]
        # removes number, period, and version number

コード例 #57

0

ファイルを表示

ファイル: cdr3_pipeline.py プロジェクト: csakis/abmining

    while_condition = False
  else:
    qwindow_average = int(qwindow_average)
  if qwindow_average > 40 or qwindow_average < 0:
    print 'You entered a wrong value.'
    print 'Try again!'
  else:
    while_condition = False

#sff sequence extraction
if file_extension == 'sff':  # extract the sequences from the sff file
  fastq_file = sample_name + '.fastq'
  stars()
  print 'The sff file is now being converted into a fastq file.'
  print 'This could take a while...'
  sff_seq_count = SeqIO.convert(raw_file_name, "sff-trim", fastq_file, "fastq")
  print '%i sequences have been converted to fastq format.' % sff_seq_count
  print '%s file has been created' % fastq_file
  stars()

if file_extension == 'fastq':
  fastq_file = raw_file_name

#*** DNA sequence quality trimming ***
stars()
print 'Now we trim the sequences using the desired quality settings.'
print 'Depending on the settings, this could take a while... (10-20 minutes)'
print 'Please be patient!'
stars()
trim_fasta_file_name = sample_name + '.trim.fasta'
good_reads = []

コード例 #58

0

ファイルを表示

ファイル: rosalind_tfsq_5.py プロジェクト: aakibinesar/Rosalind

from Bio import SeqIO

with open('rosalind_tfsq.txt') as input_data, open('output.txt', 'w') as output_data:
    SeqIO.convert(input_data, 'fastq', output_data, 'fasta' )

コード例 #59

0

ファイルを表示

ファイル: genbank2fasta.py プロジェクト: wangdi2014/bioconvert

 def _method_biopython(self, *args, **kwargs):
     from Bio import SeqIO
     SeqIO.convert(self.infile, "genbank", self.outfile, "fasta")

コード例 #60

0

ファイルを表示

                    help='''Generate qual file.''')

parser.add_argument(
    '--out',
    '-o',
    type=str,
    default=False,
    help=
    '''Output prefix. Default: same as fastq. Use "stdout" or "-" to print to screen.'''
)

args = parser.parse_args()

assert args.fa or args.qual

if not args.out:
    args.out = args.fastq.split("/")[-1].split(".")[0]

if args.fastq in ('', '-', 'stdin'):
    args.fastq = sys.stdin

if args.out and args.out in ("stdout", "-"):
    out = sys.stdout
else:
    out = args.out + ".fasta" if args.fa else args.out + ".qual"

if args.fa:
    SeqIO.convert(args.fastq, "fastq", out, "fasta")
if args.qual:
    SeqIO.convert(args.fastq, "fastq", out, "qual")