Beispiel #1
0
def sra2illumina(input_file,
                 output_file,
                 tag_read=None,
                 tag='',
                 phred_conversion=False,
                 operation='change',
                 tmp_dir=None,
                 size_read_buffer=10**8):
    """
    It converts the FASTQ file (PHRED-33 qualities and SRA read names) downloaded
    from Short Read Archive (SRA) to Illumina FASTQ file (PHRED-64 Illumina v1.5
    and Illumina read names).
    """
    temp_file = None
    if phred_conversion:
        temp_file = give_me_temp_filename(tmp_dir)
    else:
        temp_file = output_file

    read_name = file(input_file, 'r').readline().rstrip('\r\n')
    sra = False
    e = read_name.partition(" ")[0]
    if read_name.startswith('@') and (not (e.endswith('/1')
                                           or e.endswith('/2'))):
        sra = True

    if operation == 'change' or sra:
        fid = open(input_file, 'r')
        fod = open(temp_file, 'w')
        i = 0
        r = 0
        while True:
            gc.disable()
            lines = fid.readlines(size_read_buffer)
            gc.enable()
            if not lines:
                break
            n = len(lines)
            for j in xrange(n):
                r = r + 1
                i = i + 1
                if i == 1:
                    if tag_read:
                        lines[j] = '@%s%s%s\n' % (tag_read, int2str(r,
                                                                    12), tag)
                    else:  # if there is no tag_read then the original SRA id is left
                        lines[j] = '%s%s\n' % (lines[j][:-1].partition(" ")[0],
                                               tag)
                    #lines[j] = lines[j].rstrip('\r\n').upper().split(' ')[1]+tag+'\n'
                elif i == 3:
                    lines[j] = "+\n"
                elif i == 4:
                    i = 0
            fod.writelines(lines)
        fid.close()
        fod.close()
        if phred_conversion == '64':
            phred.fq2fq(temp_file,
                        'sanger',
                        output_file,
                        'illumina-1.5',
                        tmp_dir=tmp_dir)
            os.remove(temp_file)
        elif phred_conversion == '33':
            phred.fq2fq(temp_file,
                        'auto-detect',
                        output_file,
                        'sanger',
                        tmp_dir=tmp_dir)
            os.remove(temp_file)
    else:
        print "No changes are done!"
        if os.path.isfile(output_file):
            os.remove(output_file)
        if operation == 'soft':
            if os.path.islink(input_file):
                linkto = os.readlink(input_file)
                os.symlink(linkto, ooutput_file)
            else:
                os.symlink(input_file, output_file)
        elif operation == 'hard':
            linkto = input_file
            if os.path.islink(input_file):
                linkto = os.readlink(input_file)
            try:
                os.link(linkto, output_file)
            except OSError as er:
                print >> sys.stderr, "WARNING: Cannot do hard links ('%s' and '%s')!" % (
                    linkto, output_file)
                shutil.copyfile(linkto, output_file)
#                if er.errno == errno.EXDEV:
#                    # they are on different partitions
#                    # [Errno 18] Invalid cross-device link
#                    shutil.copyfile(linkto,output_file)
#                else:
#                    print >>sys.stderr,"ERROR: Cannot do hard links ('%s' and '%s')!" % (linkto,output_file)
#                    print >>sys.stderr,er
#                    sys.exit(1)

        elif operation == 'copy':
            shutil.copyfile(input_file, output_file)
        else:
            print >> sys.stderr, "ERROR: unknown operation of linking!", operation
            sys.exit(1)
def sra2illumina(input_file,
                 output_file,
                 tag_read = None,
                 tag='',
                 phred_conversion = False,
                 operation = 'change',
                 tmp_dir = None,
                 size_read_buffer = 10**8):
    """
    It converts the FASTQ file (PHRED-33 qualities and SRA read names) downloaded
    from Short Read Archive (SRA) to Illumina FASTQ file (PHRED-64 Illumina v1.5
    and Illumina read names).
    """
    temp_file = None
    if phred_conversion:
        temp_file = give_me_temp_filename(tmp_dir)
    else:
        temp_file = output_file

    read_name = file(input_file,'r').readline().rstrip('\r\n')
    sra = False
    e = read_name.partition(" ")[0]
    if read_name.startswith('@') and ( not(e.endswith('/1') or e.endswith('/2'))):
        sra = True

    if operation == 'change' or sra:
        fid = open(input_file,'r')
        fod = open(temp_file,'w')
        i = 0
        r = 0
        while True:
            lines = fid.readlines(size_read_buffer)
            if not lines:
                break
            n = len(lines)
            for j in xrange(n):
                r = r + 1
                i = i + 1
                if i == 1:
                    if tag_read:
                        lines[j] = '@%s%s%s\n' % (tag_read ,int2str(r,12) , tag)
                    else: # if there is no tag_read then the original SRA id is left
                        lines[j] = '%s%s\n' % (lines[j][:-1].partition(" ")[0], tag)
                    #lines[j] = lines[j].rstrip('\r\n').upper().split(' ')[1]+tag+'\n'
                elif i == 3:
                    lines[j] = "+\n"
                elif i == 4:
                    i = 0
            fod.writelines(lines)
        fid.close()
        fod.close()
        if phred_conversion == '64':
            phred.fq2fq(temp_file,'sanger',output_file,'illumina-1.5',tmp_dir = tmp_dir)
            os.remove(temp_file)
        elif phred_conversion == '33':
            phred.fq2fq(temp_file,'auto-detect',output_file,'sanger',tmp_dir = tmp_dir)
            os.remove(temp_file)
    else:
        print "No changes are done!"
        if os.path.isfile(output_file):
            os.remove(output_file)
        if operation == 'soft':
            if os.path.islink(input_file):
                linkto = os.readlink(input_file)
                os.symlink(linkto,ooutput_file)
            else:
                os.symlink(input_file,output_file)
        elif operation == 'hard':
            linkto = input_file
            if os.path.islink(input_file):
                linkto = os.readlink(input_file)
            try:
                os.link(linkto,output_file)
            except OSError as er:
                print >>sys.stderr,"WARNING: Cannot do hard links ('%s' and '%s')!" % (linkto,output_file)
                shutil.copyfile(linkto,output_file)
#                if er.errno == errno.EXDEV:
#                    # they are on different partitions
#                    # [Errno 18] Invalid cross-device link
#                    shutil.copyfile(linkto,output_file)
#                else:
#                    print >>sys.stderr,"ERROR: Cannot do hard links ('%s' and '%s')!" % (linkto,output_file)
#                    print >>sys.stderr,er
#                    sys.exit(1)

        elif operation == 'copy':
            shutil.copyfile(input_file, output_file)
        else:
            print >>sys.stderr, "ERROR: unknown operation of linking!", operation
            sys.exit(1)