Esempio n. 1
0
def file_shuffler(infwd, inrev, outfile):
    filetype = fasta_or_fastq(infwd)
    if filetype == 'unknown':
        sys.exit('File type not recognised' + infwd)

    lineskip = {'fasta': 2, 'fastq': 4}[filetype]

    f_infwd = mh12_utils.open_file_read(infwd)
    f_inrev = mh12_utils.open_file_read(inrev)
    f_out = mh12_utils.open_file_write(outfile)

    line_fwd = f_infwd.readline()
    line_rev = f_inrev.readline()

    while line_fwd:
        for i in range(lineskip):
            f_out.write(line_fwd)
            line_fwd = f_infwd.readline()

        for i in range(lineskip):
            f_out.write(line_rev)
            line_rev = f_inrev.readline()

    f_infwd.close()
    f_inrev.close()
    f_out.close()
Esempio n. 2
0
    def write_file(fname, a):
        f = mh12_utils.open_file_write(fname)

        for seq in a:
            print >> f, seq

        f.close()
Esempio n. 3
0
def fastn2subset(infile, ids, outfile, complement=False, start_only=False):
    filetype = fasta_or_fastq(infile)

    if filetype == 'unknown':
        sys.exit('File ' + infile + ' not recognised as a fasta/q')

    f_in = mh12_utils.open_file_read(infile)
    f_out = mh12_utils.open_file_write(outfile)

    while 1:
        seq = get_next_seq_from_file(f_in, filetype)

        if not seq:
            break

        if start_only == '':
            seq.id = seq.id.split()[0]
        elif start_only != False:
            seq.id = seq.id.split(start_only)[0]

        if (not complement and seq.id in ids) or (complement
                                                  and seq.id not in ids):
            print >> f_out, seq

    f_in.close()
    f_out.close()
Esempio n. 4
0
def fastn_splitter(fname, outprefix):
    file_list = []
    filetype = fasta_or_fastq(fname)
    if filetype == 'unknown':
        sys.exit('Unknown file format of ' + fname +
                 ' in method fastn.fastn_splitter')

    f_in = mh12_utils.open_file_read(fname)

    # loop through file, writing each sequence to new file
    while 1:
        seq = get_next_seq_from_file(f_in, filetype)

        if not seq:
            break

        outname = outprefix + seq.id.replace(' ', '_')[1:] + '.' + filetype
        f_out = mh12_utils.open_file_write(outname)
        print >> f_out, seq
        f_out.close()
        file_list.append(outname)

    f_in.close()

    return file_list
Esempio n. 5
0
def fasta2singleLine(infile, outfile):
    overwrite = False

    if infile == outfile:
        overwrite = True
        outfile = outfile + '-tmp-' + str(random.randint(0, 1000000))
        if infile.endswith('.gz'):
            outfile = outfile + '.gz'

        if os.path.exists(outfile):
            sys.exit('Yowzer! Unlikely to happen, but ' + outfile +
                     ' already exists.  Aborting')

    regex = re.compile('^\d')

    fasta = True

    # determine if it's fasta or a fasta.qual file
    f_in = mh12_utils.open_file_read(infile)

    f_in.readline()
    tmp = f_in.readline()

    if regex.match(tmp):
        fasta = False

    f_in.close()

    f_in = mh12_utils.open_file_read(infile)
    f_out = mh12_utils.open_file_write(outfile)

    first = True

    for line in f_in:
        if line.startswith("\n"):
            continue
        elif line.startswith(">"):
            if not first:
                f_out.write("\n")
            else:
                first = False

            f_out.write(line)
        else:
            f_out.write(line.rstrip())

            if not fasta:
                f_out.write(' ')

    f_out.write("\n")

    f_in.close()
    f_out.close()

    if overwrite:
        os.rename(outfile, infile)
Esempio n. 6
0
def fasta2multiline(infile, outfile, line_length):
    if infile == outfile:
        sys.exit('infile = outfile in fastn.fasta2multiline. Aborting')

    f_in = mh12_utils.open_file_read(infile)
    f_out = mh12_utils.open_file_write(outfile)

    while 1:
        seq = get_next_seq_from_file(f_in, 'fasta')

        if not seq:
            break

        print >> f_out, seq.multi_line_str(line_length)

    f_in.close()
    f_out.close()
Esempio n. 7
0
def fastn2uniq(infile, outfile):
    filetype = fasta_or_fastq(infile)
    reads = set()

    f_in = mh12_utils.open_file_read(infile)
    f_out = mh12_utils.open_file_write(outfile)

    while 1:
        seq = get_next_seq_from_file(f_in, filetype)

        if not seq:
            break

        if seq.id not in reads:
            reads.add(seq.id)
            print >> f_out, seq

    f_in.close()
    f_out.close()
Esempio n. 8
0
    parser.print_help()
    sys.exit(1)

options.infile = parser.rargs[0]
options.txtout = parser.rargs[1]
options.plotout = parser.rargs[2]

filetype = fastn.fasta_or_fastq(options.infile)

if filetype == 'unknown':
    sys.exit('File ' + infile + ' not recognised as a fasta/q')

gc_hist = dict(zip(range(101), [0] * 101))

f_in = mh12_utils.open_file_read(options.infile)
f_out = mh12_utils.open_file_write(options.txtout)

while 1:
    seq = fastn.get_next_seq_from_file(f_in, filetype)

    if not seq:
        break

    if options.window:
        i = 0

        while i < len(seq):
            tmp = fastn.Fasta(seq.id, seq.seq[i:i + options.window])
            gc = tmp.gc()
            gc_hist[floor(gc)] += 1
            print >> f_out, seq.id, str(i + 1), gc