def sra2illumina(input_file, output_file, tag_read=None, tag='', phred_conversion=False, operation='change', tmp_dir=None, size_read_buffer=10**8): """ It converts the FASTQ file (PHRED-33 qualities and SRA read names) downloaded from Short Read Archive (SRA) to Illumina FASTQ file (PHRED-64 Illumina v1.5 and Illumina read names). """ temp_file = None if phred_conversion: temp_file = give_me_temp_filename(tmp_dir) else: temp_file = output_file read_name = file(input_file, 'r').readline().rstrip('\r\n') sra = False e = read_name.partition(" ")[0] if read_name.startswith('@') and (not (e.endswith('/1') or e.endswith('/2'))): sra = True if operation == 'change' or sra: fid = open(input_file, 'r') fod = open(temp_file, 'w') i = 0 r = 0 while True: gc.disable() lines = fid.readlines(size_read_buffer) gc.enable() if not lines: break n = len(lines) for j in xrange(n): r = r + 1 i = i + 1 if i == 1: if tag_read: lines[j] = '@%s%s%s\n' % (tag_read, int2str(r, 12), tag) else: # if there is no tag_read then the original SRA id is left lines[j] = '%s%s\n' % (lines[j][:-1].partition(" ")[0], tag) #lines[j] = lines[j].rstrip('\r\n').upper().split(' ')[1]+tag+'\n' elif i == 3: lines[j] = "+\n" elif i == 4: i = 0 fod.writelines(lines) fid.close() fod.close() if phred_conversion == '64': phred.fq2fq(temp_file, 'sanger', output_file, 'illumina-1.5', tmp_dir=tmp_dir) os.remove(temp_file) elif phred_conversion == '33': phred.fq2fq(temp_file, 'auto-detect', output_file, 'sanger', tmp_dir=tmp_dir) os.remove(temp_file) else: print "No changes are done!" if os.path.isfile(output_file): os.remove(output_file) if operation == 'soft': if os.path.islink(input_file): linkto = os.readlink(input_file) os.symlink(linkto, ooutput_file) else: os.symlink(input_file, output_file) elif operation == 'hard': linkto = input_file if os.path.islink(input_file): linkto = os.readlink(input_file) try: os.link(linkto, output_file) except OSError as er: print >> sys.stderr, "WARNING: Cannot do hard links ('%s' and '%s')!" % ( linkto, output_file) shutil.copyfile(linkto, output_file) # if er.errno == errno.EXDEV: # # they are on different partitions # # [Errno 18] Invalid cross-device link # shutil.copyfile(linkto,output_file) # else: # print >>sys.stderr,"ERROR: Cannot do hard links ('%s' and '%s')!" % (linkto,output_file) # print >>sys.stderr,er # sys.exit(1) elif operation == 'copy': shutil.copyfile(input_file, output_file) else: print >> sys.stderr, "ERROR: unknown operation of linking!", operation sys.exit(1)
def sra2illumina(input_file, output_file, tag_read = None, tag='', phred_conversion = False, operation = 'change', tmp_dir = None, size_read_buffer = 10**8): """ It converts the FASTQ file (PHRED-33 qualities and SRA read names) downloaded from Short Read Archive (SRA) to Illumina FASTQ file (PHRED-64 Illumina v1.5 and Illumina read names). """ temp_file = None if phred_conversion: temp_file = give_me_temp_filename(tmp_dir) else: temp_file = output_file read_name = file(input_file,'r').readline().rstrip('\r\n') sra = False e = read_name.partition(" ")[0] if read_name.startswith('@') and ( not(e.endswith('/1') or e.endswith('/2'))): sra = True if operation == 'change' or sra: fid = open(input_file,'r') fod = open(temp_file,'w') i = 0 r = 0 while True: lines = fid.readlines(size_read_buffer) if not lines: break n = len(lines) for j in xrange(n): r = r + 1 i = i + 1 if i == 1: if tag_read: lines[j] = '@%s%s%s\n' % (tag_read ,int2str(r,12) , tag) else: # if there is no tag_read then the original SRA id is left lines[j] = '%s%s\n' % (lines[j][:-1].partition(" ")[0], tag) #lines[j] = lines[j].rstrip('\r\n').upper().split(' ')[1]+tag+'\n' elif i == 3: lines[j] = "+\n" elif i == 4: i = 0 fod.writelines(lines) fid.close() fod.close() if phred_conversion == '64': phred.fq2fq(temp_file,'sanger',output_file,'illumina-1.5',tmp_dir = tmp_dir) os.remove(temp_file) elif phred_conversion == '33': phred.fq2fq(temp_file,'auto-detect',output_file,'sanger',tmp_dir = tmp_dir) os.remove(temp_file) else: print "No changes are done!" if os.path.isfile(output_file): os.remove(output_file) if operation == 'soft': if os.path.islink(input_file): linkto = os.readlink(input_file) os.symlink(linkto,ooutput_file) else: os.symlink(input_file,output_file) elif operation == 'hard': linkto = input_file if os.path.islink(input_file): linkto = os.readlink(input_file) try: os.link(linkto,output_file) except OSError as er: print >>sys.stderr,"WARNING: Cannot do hard links ('%s' and '%s')!" % (linkto,output_file) shutil.copyfile(linkto,output_file) # if er.errno == errno.EXDEV: # # they are on different partitions # # [Errno 18] Invalid cross-device link # shutil.copyfile(linkto,output_file) # else: # print >>sys.stderr,"ERROR: Cannot do hard links ('%s' and '%s')!" % (linkto,output_file) # print >>sys.stderr,er # sys.exit(1) elif operation == 'copy': shutil.copyfile(input_file, output_file) else: print >>sys.stderr, "ERROR: unknown operation of linking!", operation sys.exit(1)