def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)
    
    # get cmd-line options
    fasta_fp=opts.input_fasta_fp
    qual_fp=opts.input_qual_fp
    output_dir=opts.output_dir
    
    # create output dir
    create_dir(output_dir)
    output_fps={}

    # open sequence files
    sequences=MinimalFastaParser(open(fasta_fp,'U'))
    qual_sequences=MinimalFastaParser(open(qual_fp,'U'))
    
    # iterate over seqs
    for seq_name,seq in sequences:
        
        # iterate over qual
        qual_seq_name,qual_seq=qual_sequences.next()
                
        # verify headers from seq and qual match
        if seq_name==qual_seq_name:
            # get the SampleID
            samp_id='_'.join(seq_name.split()[0].split('_')[:-1])
            samp_filename='seqs_%s' % (str(samp_id))
            # open files for output
            if not output_fps.has_key(str(samp_filename)):
                output_fps[str(samp_filename)] = open(join(output_dir,
                                        '%s.fastq' % (str(samp_filename))),'w')
            
            # write out the fastq format for seqs
            output_fps[str(samp_filename)].write('@%s\n%s\n+\n%s\n' % \
                                                 (seq_name,seq,qual_seq))
        else:
            print seq_name
    
    # close the files
    for s_id in output_fps:
        output_fps[str(s_id)].close()
def generate_full_split_lib_fastq(study, study_input_dir, zip_fname,
                                 files_to_remove,output_dir):
    """ Generate the full split-library fastq file """
    
    # define sequence output file
    seq_fname='study_%s_split_library_seqs.fastq.gz' % (str(study))
    fna_fname='study_%s_split_library_seqs.fna.gz' % (str(study))
    output_seq_fp=join(output_dir,seq_fname)
    output_fna_fp=join(output_dir,fna_fname)
    # add to list of files to remove
    files_to_remove.append(output_seq_fp)
    files_to_remove.append(output_fna_fp)
    
    output_seqs=gzip.open(output_seq_fp,'w')
    output_fna=gzip.open(output_fna_fp,'w')
    iterator=0
    
    # get a list of all files in study_dir
    processed_folders=listdir(study_input_dir)
    samples={}
    biom_files=[]
    for processed_folder in processed_folders:
        # determine if the file startswith the word "processed"
        if processed_folder.startswith('processed'):
            
            # define split-lib seq fp
            split_lib_seqs=join(study_input_dir,processed_folder,
                                'split_libraries','seqs.fna')
            
            # open sequence files
            seqs=MinimalFastaParser(open(split_lib_seqs,'U'))
            
            try:
                # for illumina
                split_lib_qual=join(study_input_dir,processed_folder,
                                    'split_libraries','seqs.qual')
                # open sequence files
                qual_sequences=MinimalFastaParser(open(split_lib_qual,'U'))
            except:
                # for 454
                split_lib_qual=join(study_input_dir,processed_folder,
                                    'split_libraries','seqs_filtered.qual')
                # open sequence files
                qual_sequences=MinimalFastaParser(open(split_lib_qual,'U'))
                
            # open split-lib seq fp
            seqs=MinimalFastaParser(open(split_lib_seqs,'U'))
            # iterate over sequences
            for seq_name,seq in seqs:
                # update sequence numbers since they may cause issues across
                # multiple split-lib runs
                qual_seq_name,qual_seq=qual_sequences.next()
                if seq_name==qual_seq_name:
                    
                    full_seq_name_list=seq_name.split()
                    seq_name_prefix='_'.join(full_seq_name_list[0].split('_')[:-1])
                
                    # get per sample sequence counts
                    if seq_name_prefix in samples:
                        samples[seq_name_prefix]=samples[seq_name_prefix]+1
                    else:
                        samples[seq_name_prefix]=1
                
                    # update the sequence name, but retain barcode info
                    updated_seq_name=seq_name_prefix + '_' + str(iterator) + \
                                     ' ' + ' '.join(full_seq_name_list[1:])
                
                    # write the sequence out in FASTA format
                    output_seqs.write('@%s\n%s\n+\n%s\n' % \
                            (str(updated_seq_name), str(seq), str(qual_seq)))
                    # write the sequence out in FASTA format
                    output_fna.write('>%s\n%s\n' % (str(updated_seq_name),
                                                     str(seq)))
                    iterator=iterator+1
                else:
                    print seq_name
            
            # get list of biom files
            gg_biom_fp=join(study_input_dir,processed_folder,
                            'gg_97_otus','exact_uclust_ref_otu_table.biom')
                            
            if exists(gg_biom_fp) and getsize(gg_biom_fp)>0:
                biom_files.append(gg_biom_fp)
                
    output_seqs.close()
    output_fna.close()
    
    # zip the full split-library sequence file
    #cmd_call='cd %s; tar rzvf %s %s' % (study_input_dir,zip_fname,seq_fname)
    #cmd_call='cd %s; tar rzvf %s %s' % (study_input_dir,zip_fname,fna_fname)
    #system(cmd_call)
    
    return files_to_remove, biom_files, samples