Beispiel #1
0
def spDiff( fileA, fileB ):

    sfileA = open( fileA, 'r' )
    sfileB = open( fileB, 'r' )

    seqsA = Fasta.loadSequences( sfileA )
    seqsB = Fasta.loadSequences( sfileB )

    sfileA.close()
    sfileB.close()

    spA = seqsA.findPattern('U', mode='full')
    spB = seqsB.findPattern('U', mode='full')
    
    return  spA.symetric_difference(spB, method='raw')
Beispiel #2
0
def main():

    parser = optparse.OptionParser()

    parser.add_option( '-i', '--inputfile',
                       dest='inputfilename',
                       help='blast output file, in xml format.',
                       metavar='FILE.xml' )

    parser.add_option( '-o', '--outputfile',
                       dest='outputfilename',
                       help='base output filename',
                       metavar='FILE' )

    parser.add_option( '-d', '--db',
                       dest='database',
                       help='database from which the sequences should be fetched.',
                       metavar='FILE' )

    parser.add_option( '-e', '--evalue',
                       dest='evalue',
                       type='float',
                       help='e-value threshold.',
                       metavar='FLOAT' )

    parser.add_option( '-E', '--start_expo_evalue',
                       dest='startexpoeval',
                       type='int',
                       help='exponent of the evalue threshold used when refiltering.',
                       metavar='INT' )
    
    parser.add_option( '-b', '--blast_version',
                       dest='blastversion',
                       help='set the blast version to use, either `legacy` or `plus`.',
                       metavar='VERSION' )
    
    parser.add_option( '-f', '--filter',
                       action='store_true', dest='dofilter', default=False,
                       help='do the filter step.')

    parser.add_option( '-p', '--keep_patterns_iff',
                       dest='keeppatiff',
                       help='Keep only if patterns match exactly. The patterns should be coma seperated.',
                       metavar='keyword1:pat1,pat2,pat3,,keyword2:pat1,pat2' )

    parser.add_option( '-q', '--keep_patterns',
                       dest='keeppat',
                       help='Keep patterns that match exactly, no matter what. The patterns should be coma seperated.',
                       metavar='keyword1:pat1,pat2,pat3,,keyword2:pat1,pat2' )

    parser.add_option( '-g', '--gis',
                       dest='gis',
                       help='pickle file containing the gis that should match',
                       metavar='FILE')

    parser.add_option( '-F', '--format',
                       dest='formatop',
                       help='format of the output. default is `header,evalue`',
                       metavar='INTEGER' )
                       
    parser.add_option( '-M', '--max_num_start_seq',
                       dest='maxnumstartseq',
                       type='int',
                       help='maximum number of sequences in the first alignement to be' +\
                       'processed. If set, a new input file with the top sequences ordered' +\
                       'by evalue is created and used.',
                       metavar='INTEGER' )
    
    parser.add_option( '-k', '--keep_U',
                       action='store_true', dest='keepu', default=False,
                       help='Should U containing sequences be kept regardless of their evalues ?.'+\
                       'Use in conjunction of -M')

    parser.add_option( '-T', '--temp',
                       dest='temp',
                       help='set the temp folder to use.',
                       metavar='FOLDER' )

    parser.add_option( '-P', '--parse',
                       dest='parse', action='store_true', default=False,
                       help='do not do extra fancy steps. Just parse the file and return the disired output in a file.' )

    parser.add_option( '-U', '--uniq',
                       dest='uniq', action='store_true', default=False,
                       help='remove duplicates.' )
    
    parser.add_option( '-v', '--verbose',
                       dest='verbosity',
                       type='int',
                       help='verbosity level : 0=none ; 1=standard ; 2=detailed ; 3=full',
                       metavar='INTEGER' )

    parser.set_defaults( verbosity = 1,
                         database = 'nr',
                         evalue = 10,
                         startexpoeval = -10,
                         keeppat = None,
                         blastversion = 'legacy',
                         temp = '/tmp/',
                         maxnumstartseq = None,
                         formatop = 'header,evalue')

    (options, args) = parser.parse_args()

    verbosity = options.verbosity
    database = options.database
    evalue = options.evalue
    temp = options.temp
    maxnumstartseq = options.maxnumstartseq

    blastindexfile = ''.join(( options.outputfilename, '.index.0' ))
    blastfastafile = ''.join(( options.outputfilename, '.fasta.0' ))

    os.system(' '.join(( 'touch', blastindexfile )))
    os.system(' '.join(( 'touch', blastfastafile )))

    if options.blastversion == 'legacy':
        fetcher = FastaCmdWrapper( entry=[],
                                   db=database,
                                   outfile=blastfastafile )
    else:
        fetcher = BlastDbCmdWrapper( entry=[],
                                     db=database,
                                     outfile=blastfastafile )

    ## Parse the blast output file.
    if options.parse:
        if verbosity >= 1:
            sys.stderr.write( '\n' )
            sys.stderr.write( '>>> Parsing blast output : ' +\
                              options.inputfilename + '\n' )
        with open(options.inputfilename, 'r') as infile:
            blastparser = PsiBlastXMLParser(infile)
            blastparser.parse()
            if verbosity >= 2:
                sys.stderr.write('    >>> Extracting required data.\n')
            if options.dofilter:
                sequences = blastparser.extractData( evalue=evalue,
                                                     fmt=options.formatop,
                                                     outfile=blastindexfile,
                                                     includepatternsiff=fmtOptPat(options.keeppatiff),
                                                     includepatterns=fmtOptPat(options.keeppat),
                                                     excludepatterns=({'title':['hypothetical', 'predicted', 'PREDICTED']}))
            else:
                sequences = blastparser.extractData( evalue=evalue,
                                                     fmt=options.formatop,
                                                     outfile=blastindexfile )
        
    ## Only keep one copy of a header, the one with the best evalue.
    if options.uniq:
        if verbosity >= 1:
            sys.stderr.write( '\n' )
            sys.stderr.write( '>>> Keeping only best evalues.\n' )
        uniq(blastindexfile)
    
    ## Gather all GIs in list
    if verbosity >= 2:
        sys.stderr.write( '\n' )
        sys.stderr.write( '>>> Gathering all Gis.\n' )
    entries = []
    with open(blastindexfile, 'r') as bif:
        for line in bif:
            entries.append(line.split('|')[1])
    fetcher.entry = entries

    ## Fetch the sequences from the local databases.
    ## TODO : Fetch failed from the web.
    if verbosity >= 1:
        sys.stderr.write( '\n' )
        sys.stderr.write( '>>> Building fasta.0 file by fetching sequences from local database.\n' )
    fetcher.run()

    ## Apply final filters : keep only top evalues and U containing until a threshold is reached
    if maxnumstartseq:
        if verbosity >= 1:
            sys.stderr.write( '\n' )
            sys.stderr.write( '>>> Applying final filters on ' + \
                              blastfastafile + '.\n' )
        if verbosity >= 2:
            sys.stderr.write( '    >>> Adding evalue to headers.\n' )
        ### TODO : use .fasta.fh in tmp dir.
        tmpfullheadfasta = blastfastafile + '.fh'
        addheaders = AddFullHeadersWrapper2(blastfastafile,
                                           tmpfullheadfasta,
                                           blastindexfile)
        addheaders.run()
        if verbosity >= 3:
            sys.stderr.write( '        >>> Loading sequences.\n' )
        with open(tmpfullheadfasta, 'r') as ff:
            allseqs = Fasta.loadSequences(ff)
        if verbosity >= 2:
            sys.stderr.write( '    >>> Keeping valid sequences.\n' )
        tmppat = None
        if options.keepu:
            tmppat = 'U'
        validseqs = getTopSeqs(seqs=allseqs,
                               maxnumseqs=maxnumstartseq,
                               startevalue=options.startexpoeval,
                               pattern=tmppat,
                               verbose=verbosity>=4 )
        keptseqs = '.'.join(( options.outputfilename,
                              str(validseqs[1]),
                              str(len(validseqs[0])),
                              'fasta' ))
        if verbosity >= 2:
            sys.stderr.write( '    >>> Found ' + str(len(validseqs[0])) + \
                              ' sequences with evalue <= 1e' + \
                              str(validseqs[1]) + '\n' )
        with open(keptseqs, 'w') as ff:
            validseqs[0].save(ff)

    sys.stderr.write( '\n' )
Beispiel #3
0
def main():

    parser = optparse.OptionParser()

    parser.add_option( '-i', '--inputfile',
                       dest='inputfilename',
                       help='fasta file in which selenoproteins should be looked for.',
                       metavar='FILE' )

    parser.add_option( '-o', '--outputfile',
                       dest='outputfilename',
                       help='fasta file containing the selenoproteins',
                       metavar='FILE' )

    parser.add_option( '-v', '--verbose',
                       dest='verbosity',
                       help='verbosity level : 0=none ; 1=standard ; 2=detailed ; 3=full',
                       metavar='INTEGER' )

    parser.set_defaults( verbosity = '1' )

    (options, args) = parser.parse_args()

    stdoutflag = False

    verbosity = int( options.verbosity )

    if options.inputfilename:

        inputfilenames = options.inputfilename.split(',')
        infiles = []
        for i in inputfilenames:
            infiles.append( open( i, 'r' ) )

    else: sys.exit( 'You must provide an input filename.')

    if options.outputfilename:

        outfile = open( options.outputfilename, 'w' )
        stdoutflag = True

    else: outfile = sys.stdout

    for f in infiles:
        if verbosity >= 1:
            print
            print '>>> Searching for selenoproteins in file ' + f.name
            print

        if verbosity >= 2:
            print '>>> Loading sequences ...'

        sequences = FastaLib.loadSequences( f )

        if verbosity >= 2:
            print '>>> ... Done.'
            print

        if verbosity >= 2:
            print '>>> Searching for U containing sequences ...'

        selenoproteins = findSelenoproteins( sequences )

        if verbosity >= 2:
            print '>>> ... Done.'
            print

        FastaLib.saveSequences(selenoproteins, outfile)

        for selP in selenoproteins:

            if verbosity >= 3 and stdoutflag:
                print selP.header.strip()
                print selP.sequence.strip()

        if verbosity >= 1:
            print
            print 'Found ' + str( len( selenoproteins ) ) + ' selenoproteins'
            print

    for i in infiles:
        i.close()
    outfile.close()
Beispiel #4
0
def main():

    parser = optparse.OptionParser()

    parser.add_option( '-i', '--inputfile',
                       dest='inputfilename',
                       help='file containing the alignments that will be used to build the PSSM using prepare_alignment_selenoprofiles.py.',
                       metavar='FILE' )

    parser.add_option( '-r', '--datadir',
                       dest='datadir',
                       help='directory containing, for each familly FAM, a directory FAM.blast and a directory FAM.selenoprofiles.prep',
                       metavar='DIR' )

    parser.add_option( '-o', '--outputfile',
                       dest='outputfilename',
                       help='base name used for outputs',
                       metavar='NAME' )

    parser.add_option( '-a', '--n_core',
                       dest='ncore',
                       type='int',
                       help='number of cores to use during the various operations.',
                       metavar='INTEGER' )

    parser.add_option( '-M', '--mafft',
                       action='store_true', dest='domafft', default=False,
                       help='do the mafft step.')
    
    parser.add_option( '-T', '--trimal',
                       action='store_true', dest='dotrimal', default=False,
                       help='do the trimal step.')

    parser.add_option( '-C', '--tcoffee',
                       action='store_true', dest='dotcoffee', default=False,
                       help='do the t_coffee step.')

    parser.add_option( '-B', '--headers',
                       action='store_true', dest='doheaders', default=False,
                       help='do the addheaders step.')

    parser.add_option( '-p', '--patternfile',
                       dest='patternfile',
                       help='pattern file to use if the -D option is used.',
                       metavar='FILE' )

    parser.add_option( '-F', '--filter',
                       action='store_true', dest='dofilter', default=False,
                       help='do the filter step.')

    parser.add_option( '-P', '--prepare',
                       action='store_true', dest='doprepal', default=False,
                       help='do the prepare_alignment_selenoprofiles step.')

    parser.add_option( '-g', '--tag_threshold',
                       dest='tagthreshold',
                       type='float',
                       help='tag threshold to use if the -P or --prepare is used.',
                       metavar='FLOAT' )

    parser.add_option( '-A', '--all',
                       action='store_true', dest='doall', default=False,
                       help='do all steps.')

    parser.add_option( '-Y', '--dry',
                       action='store_true', dest='dryrun', default=False,
                       help="Prints the commands without executing them.")
    
    parser.add_option( '-D', '--debug',
                       action='store_true', dest='debug', default=False,
                       help="Debug mode. Nothing is cleaned.")

    parser.add_option( '-t', '--temp',
                       dest='temp',
                       help='set the temp folder to use.',
                       metavar='FOLDER' )

    parser.add_option( '-v', '--verbose',
                       dest='verbosity',
                       type='int',
                       help='verbosity level : 0=none ; 1=standard ; 2=detailed ; 3=full',
                       metavar='INTEGER' )



    parser.set_defaults( verbosity = 1,
                         ncore = 1,
                         tagthreshold = 0.5,
                         temp = '/tmp/',
                         patternfile = 'None' )

    (options, args) = parser.parse_args()

    if options.doall:
        options.doheaders = True
        options.dofilter = True
        options.domafft = True
        options.dotrimal = True
        options.dotcoffee = True
        
    infile = options.inputfilename
    tmpinitfilename = genTempfilename(options.temp, 'ungapped_')
    with open(infile, 'r') as iff:
        tmpseqs = Fasta.loadSequences(iff)
    with open(tmpinitfilename, 'w') as ugf:
        for seq in tmpseqs:
            removeGaps(seq).prints(ugf)
    tmpinfile = tmpinitfilename

    mafftoutfile = ''.join((options.outputfilename, '_mafft.fasta'))
    trimaloutfile1 = ''.join((options.outputfilename, '_trimmed_native.fasta'))
    trimaloutfile2 = ''.join((options.outputfilename, '_trimmed_spadded.fasta'))
    trimaloutfile = trimaloutfile1
    tcoffeeoutfile = ''.join((options.outputfilename, '_tcoffee.fasta'))
    fullheadoutfile = ''.join((options.outputfilename, '.det.fasta'))
#    patternfile = ''.join(('.'.join(options.inputfilename.split('.')[:2]), '.index.0'))
    patternfile = options.patternfile
    filteroutfile = ''.join((options.outputfilename, '.filt.fasta'))

    ncore = options.ncore
    verbosity = options.verbosity
    temp = options.temp

    addheaders = UtilityWrappers.AddFullHeadersWrapper2(tmpinfile,
                                                        fullheadoutfile,
                                                        patternfile)

    filterseqs = UtilityWrappers.FilterWrapper(tmpinfile,
                                               filteroutfile,
                                               inverse=True,
                                               titlematch=('PREDICTED', 'predicted', 'hypothetical'))

    mafft = UtilityWrappers.MafftWrapper(tmpinfile,
                                         mafftoutfile,
                                         auto=True)

    trimal = UtilityWrappers.TrimalWrapper(tmpinfile,
                                           trimaloutfile1,
                                           clusters=100)

    tcoffee = UtilityWrappers.TcoffeeWrapper(trimaloutfile2,
                                             tcoffeeoutfile,
                                             ncore=ncore)

    prepsp = UtilityWrappers.SelenoprofilesPreWrapper(tmpinfile,
                                                      options.outputfilename,
                                                      all=True,
                                                      tagthreshold=options.tagthreshold,
                                                      temp=temp)

    try:

        if options.dryrun:
            print('\nThis is a dry run. Relaunch the command without the option -Y to do the actual stuff.\n')



        ## Add full headers
    ##     if options.doheaders:
    ##         addheader.infile = tmpinfile
    ##         tmpinfile = fullheadoutfile
    ##         if options.dryrun:
    ##             print addheaders.cline
    ##         else:
    ##             if verbosity >= 1:
    ##                 sys.stderr.write('\n    >>> Adding headers\n\n')
    ##             addheaders.run()

        ## Filter out the 'fake' proteins
        if options.dofilter:
            time.sleep(0.5)
            filterseqs.infile = tmpinfile
            tmpinfile = filteroutfile
            if options.dryrun:
                print filterseqs.cline
            else:
                if verbosity >= 1:
                    sys.stderr.write('\n    >>> Filtering out\n\n')
                filterseqs.run()

        ## run mafft
        numseqinmafftoutput = 0 
        if options.domafft:
            time.sleep(0.5)
            mafft.infile = tmpinfile
            tmpinfile = mafftoutfile
            if options.dryrun:
                print mafft.cline
            else:
                if verbosity >= 1:
                    sys.stderr.write('\n    >>> Running Mafft\n\n')
                mafft.run()
                with open(mafftoutfile, 'r') as mfo:
                    seqs = Fasta.loadSequences(mfo)
                    numseqinmafftoutput = len(seqs)

        ## run trimal
        if options.dotrimal and numseqinmafftoutput > 200:
            time.sleep(0.5)
            trimal.infile = tmpinfile
            tmpinfile = trimaloutfile1
            if options.dryrun:
                print trimal.cline
            else:
                if verbosity >= 1:
                    sys.stderr.write('\n    >>> Running Trimal\n\n')
                trimal.run()

        if not options.dryrun and options.dotcoffee and options.dotrimal:
            if verbosity >= 1:
                sys.stderr.write('\n    >>> Removing gaps\n\n')

            ti = open(tmpinfile, 'r')
            tmpinfile = trimaloutfile2
            to = open(tmpinfile, 'w')

            si = Fasta.loadSequences(ti)
            ti.close()
            refs = Fasta.SequenceList()

            ## saves the sequences with no gaps
            for s in si:
                refs.append(removeGaps(s))
            Fasta.saveSequences(refs, to)

            if options.dotrimal and numseqinmafftoutput > 200:
                if verbosity >= 1:
                    sys.stderr.write('\n    >>> Adding ommited selenoproteins\n')
                ## Gather the non intersecting proteins from the 2 files
                diffSelenoproteins = spDiff( mafftoutfile,
                                             trimaloutfile1 )

                spDiffr = Fasta.SequenceList()
                ## remove gaps from selenoproteins
                for s in diffSelenoproteins:
                    spDiffr.append(removeGaps(s))
                ## append to the file the selenoproteins that were not present
                Fasta.saveSequences(spDiffr, to)
            to.close()

        ## run t_coffee
        if options.dotcoffee:
            time.sleep(0.5)
            tcoffee.infile = tmpinfile
            tmpinfile = tcoffeeoutfile
            if options.dryrun:
                print tcoffee.cline
            else:
                if verbosity >= 1:
                    sys.stderr.write('\n    >>> Running T_coffee\n\n')
                tcoffee.run()

        ## Add full headers
        if options.doheaders:
            time.sleep(0.5)
            addheaders.infile = tmpinfile
            tmpinfile = fullheadoutfile
            if options.dryrun:
                print addheaders.cline
            else:
                if verbosity >= 1:
                    sys.stderr.write('\n    >>> Adding headers\n\n')
                addheaders.run()

        ## prepare alignments for selenoprofiles
        if options.doprepal:
            time.sleep(0.5)
            prepsp.infile = tmpinfile
            if options.dryrun:
                print prepsp.cline
            else:
                if verbosity >= 1:
                    sys.stderr.write('\n    >>> preparing for selenoprofiles\n\n')
                prepsp.run()

    except KeyboardInterrupt:
        sys.exit('manual exit.')
    finally:
        if not options.debug:
            if verbosity >= 2:
                sys.stderr.write('\n    >>> Removing temporary file ' + tmpinitfilename +'\n\n')
            os.remove(tmpinitfilename)
Beispiel #5
0
def main():

    parser = optparse.OptionParser()

    parser.add_option( '-i', '--inputfile',
                       dest='inputfilename',
                       help='file with incomplete headers.',
                       metavar='FILE' )

    parser.add_option( '-o', '--outputfile',
                       dest='outputfilename',
                       help='outputfile.',
                       metavar='FILE' )

    parser.add_option( '-p', '--pattern',
                       dest='patternfilename',
                       help='pattern file containing the complete headers.',
                       metavar='FILE' )

    parser.add_option( '-m', '--method',
                       dest='method',
                       help='Method to use when filling the headers.' \
                       'gi means match will be done by gi. inplace means that' \
                       'header substitution is made by following the order.',
                       metavar='{gi}|inplace' )

    parser.set_defaults( outputfilename = None,
                         method = 'gi')

    (options, args) = parser.parse_args()

    if not (options.inputfilename and options.patternfilename):
        parser.error('You have to provide two files, check help.')

    with open(options.inputfilename, 'r') as iff:
        inlines = Fasta.loadSequences(iff)
    with open(options.patternfilename, 'r') as pff:
        patlines = [line for line in pff.readlines() \
                    if line.startswith('>')]

    if not options.outputfilename:
        outfile = sys.stdout
    else:
        outfile = open(options.outputfilename, 'w')

    if options.method == 'gi':
        GI_REGEX = re.compile(r'gi\|(\d+)\|')

        for iseq in inlines:
            nofound = True
            for phead in patlines:
                try:
                    giq = GI_REGEX.search(iseq.header).group(1)
                    gis = GI_REGEX.search(phead).group(1)
                    if giq == gis:
                        tmpseq = Fasta.Sequence(phead, iseq.sequence)
                        tmpseq.prints(outfile)
                        nofound = False
                        break
                except AttributeError as e:
                    sys.stderr.write(iseq.header + ' ' + phead)
                    sys.exit(-1)
                except IndexError as e:
                    sys.stderr.write( '\nError while processing the files:\n' )
                    sys.stderr.write( pline + '\n' )
                    sys.stderr.write( line + '\n' )
                    break
            if nofound:
                sys.stderr.write('\n' + iseq.header + '\n')
    elif options.method == 'inplace':
        if len(inlines) != len(patlines):
            raise Exception, 'Different number of sequences'
        for seq, pat in zip(inlines, patlines):
            Fasta.Sequence(pat, seq.sequence).prints(outfile, 60)
    else:
        parser.error('Wrong method')

    outfile.close()
Beispiel #6
0
def main():

    parser = optparse.OptionParser()

    parser.add_option( '-i', '--inputfile',
                       dest='inputfilename',
                       help='fasta file in which selenoproteins should be looked for.',
                       metavar='FILE' )

    parser.add_option( '-a', '--alignmentfile',
                       dest='alfilename',
                       help='alignment file used when details are requested.',
                       metavar='FILE' )

    parser.add_option( '-o', '--outputfile',
                       dest='outputfilename',
                       help='base output filename',
                       metavar='FILE' )

    parser.add_option( '-f', '--keep_prefilter',
                       dest='keepprefilter',
                       help='prefilters all sequences that have the given pattern in their name and keep them.',
                       metavar='PATTERN' )
    
    parser.add_option( '-F', '--throw_prefilter',
                       dest='throwprefilter',
                       help='prefilters all sequences that have the given pattern in their name and throw them.',
                       metavar='PATTERN' )

    parser.add_option( '-b', '--autothrow_abscents',
                       action='store_true', dest='atabscent', default=False,
                       help='Throw all sequences not present in the alignment provided.')

    parser.set_defaults( keepprefilter = False,
                         throwprefilter = False,
                         alfilename = False )

    (options, args) = parser.parse_args()

    with open(options.inputfilename, 'r') as inf:
        sequences = Fasta.loadSequences(inf)

    if options.alfilename:
        with open(options.alfilename, 'r') as alf:
            alignment = Fasta.Alignment(Fasta.loadSequences(alf))
        nrdetail = alignment.findPositions(('U','C','-'), False)
        rdetail = alignment.findPositions(('U','C','-'), True)
        
    if options.keepprefilter:
        kpatterns = options.keepprefilter.split(',')
    if options.throwprefilter:
        tpatterns = options.throwprefilter.split(',')

    kept_seq = Fasta.SequenceList()
    thrown_seq = Fasta.SequenceList()
    man_check_list = Fasta.SequenceList()

    for seq in sequences:
        kept = False
        thrown = False
        if options.keepprefilter:
            for pattern in kpatterns:
                if pattern in seq.header:
                    kept_seq.append(seq)
                    kept = True
        if options.throwprefilter:
            for pattern in tpatterns:
                if pattern in seq.header:
                    thrown_seq.append(seq)
                    thrown = True
        if not kept and not thrown:
            man_check_list.append(seq)

    idx = 0
    while idx < len(man_check_list):
        seq = man_check_list[idx]
        gi = seq.header.split('|')[1]
        choice = 'r'
        decided = False
        print seq.header
        while not decided:
            print len(kept_seq), len(thrown_seq)
            choice = getch('# '+str(idx+1)+' / '+str(len(man_check_list))+' -- Keep ? [Y/n]')
            if choice == 'b':
                if idx > 0:
                    idx -= 1
                    seq = man_check_list[idx]
                    gi = seq.header.split('|')[1]
                    print seq.header
                    try:
                        thrown_seq.remove(seq)
                    except:
                        pass
                    try:
                        kept_seq.remove(seq)
                    except:
                        pass
            elif choice in ('y', '\n'):
                kept_seq.append(seq)
                decided = True
                idx += 1
            elif choice == 'n':
                thrown_seq.append(seq)
                decided = True
                idx += 1
            elif choice == 's':
                os.system('fetch_seq.g -v TITLE="'+gi+'" -v ALL=1 '+options.inputfilename )
            elif choice == 'd' and options.alfilename:
                print
                print 'General Detail :'
                for pos in rdetail['U']:
                    sys.stdout.write('    '+str(pos) + ' ')
                    for xpos in rdetail:
                        try:
                            sys.stdout.write(str(xpos)+': ')
                            sys.stdout.write(str(len(rdetail[xpos][pos])) + ' ; ')
                        except KeyError:
                            sys.stdout.write('0 ; ')
                    sys.stdout.write('\n')
                print
                tmpseq = None
                for seqal in alignment:
                    if seqal.header == seq.header:
                        tmpseq = seqal
                if tmpseq:
                    tmppos = [i for i, x in enumerate(tmpseq.sequence) if x == 'U']
                    print 'In the sequence provided :'
                    print '    U :', tmppos
                    print '    U in those positions :', [len(rdetail['U'][(l,)]) for l in tmppos]
                    print '    C in those positions:', [len(rdetail['C'][(l,)]) for l in tmppos]
                    print '    - in those positions:', [len(rdetail['-'][tuple((l,))]) for l in tmppos]
                    print
                    print '    Symbols present at the positions of each U :'
                    for pos in [p for p in rdetail['U'] if p != ()]:
                        spos = str(pos[0])
                        print '        Position :', spos, '---', tmpseq.sequence[int(spos)]
                else:
                    print 'Not present in the alignment provided'
                print
            elif choice == 'q':
                cc = 'r'
                while cc not in ('y', 'n'):
                    cc = raw_input('Manual quit. Would you like to save your changes ? [y/N]')
                    if cc in 'y':
                        pass
                    if cc in 'n':
                        sys.exit('Quiting without saving.')
            else:
                print 'Wrong command'

    with open(options.outputfilename, 'w') as of:
        kept_seq.prints(of, 80)