assert "ProteinName" in header
assert "rt_calibrated" in header
assert "charge" in header
assert "product" in header
assert "library_intensity" in header

# group the csv lines by their group id
tr_group = {}
for line in lines:
    if line[header_d["group_id"]] in tr_group:
        tr_group[line[header_d["group_id"]]].append(line)
    else:
        tr_group[line[header_d["group_id"]]] = [line]

# loop through all groups and create a sptxt spectrum for each group
l = speclib_db_lib.Library(1)
cnt = 0
for key in tr_group:
    group = tr_group[key]
    firstline = group[0]
    spectrum = speclib_db_lib.Spectra()
    spectrum.name = firstline[header_d["group_id"]].replace('.', '/')
    spectrum.LibID = 1
    spectrum.MW = firstline[header_d["precursor"]]
    spectrum.precursorMZ = firstline[header_d["precursor"]]
    spectrum.status = "Normal"
    spectrum.full_name = firstline[header_d["group_id"]].replace('.', '/')
    spectrum.sequence = firstline[header_d["PeptideSequence"]]
    spectrum.number_peaks = len(group)

    ##### The comment
def main(argv) :

    distance        = 1.0
    algorithm       = False
    
    #Get options
    try:
        opts, args = getopt.getopt(argv, "hd:i:t:a:",["help","distance","algorithm"])

    except getopt.GetoptError:
        usage()
        sys.exit(2)

    argsUsed = 0
    for opt,arg in opts:
        if opt in ("-h","--help") :
            usage()
            sys.exit()
        if opt in ("-d","--distance") :
            distance = float(arg)
            argsUsed += 2
        if opt in ("-a","--algorithm") :
            algorithm = arg
            argsUsed += 2
    
    
    sptxtfiles_pat = argv[argsUsed:]
    sptxtfiles = []
    for pat in sptxtfiles_pat :
        sptxtf = glob.glob(pat)
        for file in sptxtf : sptxtfiles.append(file)


    for sptxtfile in sptxtfiles :
        transitions = []
        print("Reading : " , sptxtfile)
        assert sptxtfile[-6:] == '.sptxt'
        if not os.path.exists(sptxtfile):
            print("The file: %s does not exist!" % sptxtfile)
            sys.exit(2)
            
        library_key = 99
        spectrastlib = speclib_db_lib.Library(library_key)

        num_spectrum = 0
        offset = spectrastlib.get_first_offset(sptxtfile)
        last_offset = -100

        #Get all the peptide sequences and retention times to cluster them. Keep the spectrum number associated. 
        
        peptide_spectra = {}  # { "SEQUEN[Pho]CE" : {last_offset1 : RT1  , last_offset2 : RT2 } , "SEQUENCE" : {last_offset3 : RT3 , last_offset4 : RT4 , ... } , ...  }
        
        
        while ( offset - last_offset > 10) :
            last_offset = offset
            offset , spectrum = spectrastlib.read_sptxt_with_offset(sptxtfile,offset)

            #for property, value in vars(spectrum).iteritems():
            #    if property    in ['compress_spectra' ] : continue
            #    print property, ": ", value
            #sys.exit()

            sequence     = spectrum.name.split('/')[0]
            z_parent     = float(spectrum.name.split('/')[1])
            if spectrum.RetTime_detected:
              rt             = spectrum.RetTime
            if spectrum.iRT_detected:
              rt             = spectrum.iRT
            if not spectrum.RetTime_detected and not spectrum.iRT_detected:
              print("No RT/iRT was detected for %s" % spectrum.name)
              sys.exit(2)
            
            if sequence in list(peptide_spectra.keys()) :
                peptide_spectra[sequence][last_offset] = rt
            else :
                peptide_spectra[sequence] = { last_offset : rt }
            
        
        max_num_of_clusters = 0
        peptide_spectra_cl = {}
        
        print("cluster spectra by iRTs...")
        for sequence, spectra in peptide_spectra.items() :
            print(sequence)  #, spectra
            rt_clusters = clusterRT(list(spectra.values()), distance, algorithm = algorithm)
            
            if len(rt_clusters) > max_num_of_clusters : max_num_of_clusters = len(rt_clusters)
            
            peptide_spectra_cl[sequence] = {}
            
            for spectrum, rt in spectra.items() :
                # Determine cluster number for this rt
                cl_index = -1
                for index, cluster in enumerate(rt_clusters) :
                    #print index, rt, cluster
                    cl = cluster
                    if not isinstance(cluster,list) : cl = [cluster] 
                    if rt in cl : cl_index = index
                
                #store cluster index in a dictionary      
                peptide_spectra_cl[sequence][spectrum] = cl_index
        
            
        splitfiles = [ open(sptxtfile[:-6]+"_"+str(x+1)+".sptxt",'w') for x in  range(max_num_of_clusters)  ]        

        #init the files by using the original header
        print("%s files will be created." % max_num_of_clusters)
        
        original_header = spectrastlib.get_fileheader(sptxtfile)
        for file in splitfiles :
            for line in original_header : file.write(line)
                     
        for sequence, spectra in peptide_spectra_cl.items() :
            for spectrum in spectra :
                sp = spectrastlib.get_rawspectrum_with_offset(sptxtfile,spectrum)  #get the spectrum
                
                for line in sp :
                    #Add suffix to the protein name
                    if 'Comment:' in line[:8] : 
                        line_bcp = line
                        mm = re.search( 'Protein=(.*?)\s', line )
                        if not mm: break 
                        split_idx = line.index('Protein=') + 8
                        line_before_split = line[:split_idx]
                        line_after_split = line[split_idx:]
                        line = line_before_split + "Subgroup_" + str(peptide_spectra_cl[sequence][spectrum]) + "_" + line_after_split
                        
                    splitfiles[peptide_spectra_cl[sequence][spectrum]].write(line)
                    
        for file in splitfiles :
            file.close()
        
        print("done.")