# raw_info['fetchid'] = raw_info['fetchid'].apply(int)
# this is an UGLY fix that we'd have to implement here just to save everything ...
if args.exp_num:
    bad_info['enzyme'] = 'T'
#
# fasta = SeqIO.to_dict(SeqIO.parse(fasta_fname,"fasta"),key_function=lambda _: _.id.split('|')[1])
# 1-BASED NOTATION FOR PROTEINS INDEXING ENFORCED ...
# pep_df = pd.read_csv(uniq_pept_fname)

# connection between peptide info and spectrum info to be established ...
##########################################################################
# unroll that spec table to have 1 deamid per row ...
#
#
#
quant_info_unrolled = ms.unroll_by_mfunc(quant_info,['Modifications','Sequence'],(lambda row: ms.extract_deamids(row[0],row[1])),'deamid_info')
# now we'd have to determine the type of the 'Prob' column, object,float, or somethgin else ...
# a new fix @ August 3 2016 ...
if quant_info_unrolled['Prob'].dtype == 'float':
    quant_info_unrolled['pept_ident_probab'] = quant_info_unrolled['Prob']
elif quant_info_unrolled['Prob'].dtype == 'object':
    quant_info_unrolled['pept_ident_probab'] = quant_info_unrolled['Prob'].str.strip('%').apply(float)
##########################################################

# so far the following merge seems to be 100% sufficient for the desired final output ...
# we could add on extra features if needed ...
quant_n_raw = quant_info_unrolled[['pept',
                    'deamid_info',
                    'pept_with_mod',
                    'Weight',
                    'spec_name',
spec_info = pd.read_csv(spec_fname,sep=separator)
# fix their peptide sequence thing right away ...
spec_info['pept'] = spec_info['Peptide sequence'].str.upper()
pep_info['fetchid'] = pep_info['fetchid'].apply(int)
# this is an UGLY fix that we'd have to implement here just to save everything ...
if args.exp_num=='1':
    pep_info['enzyme'] = 'T'
#
# fasta = SeqIO.to_dict(SeqIO.parse(fasta_fname,"fasta"),key_function=lambda _: _.id.split('|')[1])
# 1-BASED NOTATION FOR PROTEINS INDEXING ENFORCED ...
# pep_df = pd.read_csv(uniq_pept_fname)

# connection between peptide info and spectrum info to be established ...
##########################################################################
# unroll that spec table to have 1 deamid per row ...
spec_info_unrolled = ms.unroll_by_mfunc(spec_info,'Variable modifications identified by spectrum',ms.extract_deamids,'deamid_info')
spec_info_unrolled['prot_ident_probab'] = spec_info_unrolled['Protein identification probability'].str.strip('%').apply(float)
spec_info_unrolled['pept_ident_probab'] = spec_info_unrolled['Peptide identification probability'].str.strip('%').apply(float)
##########################################################

# so far the following merge seems to be 100% sufficient for the desired final output ...
# we could add on extra features if needed ...
spec_n_pep = spec_info_unrolled[['pept',
                    'deamid_info',
                    'prot_ident_probab',
                    'pept_ident_probab']].merge(pep_info,how='right',on='pept',suffixes=('','_x'))

# Now, extract those gsites ...
dg_func = lambda x: pd.Series( ms.deamid_to_gsite(x['deamid_info'], x['start_fetched'], str(gbrecs[str(int(x['fetchid']))].seq)) )
# and add them back to the main table ...
gs_res = spec_n_pep[['deamid_info','start_fetched','fetchid']].apply( dg_func, axis=1 )