def read_xcms_peaks(input_file):
    fp = open(input_file, 'r')

    lines = fp.readlines()

    peak_list = []

    for line in lines[2:]:
        parts = line.split('\t')
        xcms_id = parts[0].strip('"')
        rt = parts[4]
        mass = parts[1]

        if parts[-3] != '' and parts[-2] == '':
            tag = parts[-3]
        elif parts[-3] == '' and parts[-2] != '':
            tag = parts[-2]
        elif parts[-3] != '' and parts[-2] != '':
            tag = parts[-3]+' ' + parts[-2]
        else:
            tag = ''

        tag = tag.strip('"')
        compound_group = parts[-1]
        name = 'unknown'


        peak = Peak(float(mass), float(rt), compound_group = compound_group, \
                    tag=tag, name=name)
        peak.set_xcms_id(int(xcms_id))


        peak_list.append(peak)

    return peak_list
def read_library_peaks(input_file):
    fp = open(input_file, 'r')

    lines = fp.readlines()

    peak_list = []

    for i, line in enumerate(lines):
        parts = [x.strip('$').strip() for x in line.split('$')]
        try:
            rt = float(parts[1])
            mass = float(parts[2])
        except:
            try:
                parts = [x.strip(',').strip() for x in line.split(',')]
                rt = float(parts[2])
                mass = float(parts[1])
            except:
                print 'Warning: Error reading line %s from reference file' %i
                print '====================================================='
                print 'Line contents: %s' %line.strip()
                print
                continue

        name = parts[0]
        #print name, rt, mass
        tag = ''
        peak = Peak(float(mass), float(rt), name=name)
        peak.add_tag(tag)

        peak_list.append(peak)

    return peak_list
def add_isotope(peak, isotope):
    mass = peak.get_mass()
    rt = peak.get_rt()
    tag = "M+"+str(isotope)
    name = peak.get_name()
    id_num = peak.get_id_num()
    new_mass = mass + isotope*1.0086710869

    new_peak = Peak(new_mass, rt, '0', tag, name=name+' '+tag)
    new_peak.set_parent(id_num)

    return new_peak
def get_decoy_peaks(peak, mode):
    """Based on the idea of Palmer, Alexandrov et al
    Use other elements from the periodic table to create
    impossible adducts
    """
    mass = peak.get_mass()
    rt = peak.get_rt()
    name = peak.get_name()
    decoy_dict = {'He':4.002602, 'Be':9.0121831, 'F':18.998403,
                  'Al':26.9815385, 'Sc':44.955908, 'Fe':55.845,
                  'Ge':72.630, 'Sr':87.62}
    HYDROGEN_MASS = 1.0078250

    id_num = peak.get_id_num()

    peaks = []

    if mode == 'pos':
        for key,value in decoy_dict.iteritems():
            new_mass = mass + value
            new_peak = Peak(new_mass, rt, '0', tag='+'+key, name=name+'+'+key)
            new_peak.set_parent(id_num)
            peaks.append(new_peak)

    if mode == 'neg':
        for key,value in decoy_dict.iteritems():
            new_mass = mass - HYDROGEN_MASS + value
            new_peak = Peak(new_mass, rt, '0', tag='-H+'+key, name=name+'-H+'+key)
            new_peak.set_parent(id_num)
            peaks.append(new_peak)

    return peaks
def read_peaks(infile):

    fp = open(infile, 'r').readlines()
    peaks = []
    index = 0
    fnames = []
    ints = []
    for i, l in enumerate(fp):
        l = l.strip().split(',')
        if i == 0:
            metNames = [str(x) for x in l[1:]]
        if i == 1:
            mzs = [float(x) for x in l[1:]]
        if i == 2:
            rts = [float(x) for x in l[1:]]
        if i > 4:
            fnames.append(str(l[0]))
            ints.append([float(x) for x in l[1:]])

    # transpose ints
    ints = map(list, zip(*ints))

    for i in range(len(mzs)):
        peak = Peak(i, mzs[i], rts[i], fnames, ints[i], metNames[i])
        peaks.append(peak)

    return peaks
def get_adduct_peaks(peak, mode):
    """
    Adducts are from CAMERA paper supplemental material
    """
    if mode not in ['pos','neg']:
        print "Error: mode must be 'pos' or 'neg'"

    mass = peak.get_mass()
    rt = peak.get_rt()
    name = peak.get_name()

    positive_dict = {'M+H':1.0078250, 'M+Na':22.989769, 'M+K':39.0983,
                     'M+NH4':18.03773, 'M+2H(2+)':2.01565,
                     'M+Na+HCOOH':69.013079, 'M+H+K(2+)':40.106125,
                     '2M+Na':22.989769, 'M+K+HCOOH':85.12161,
                     '2M+K':39.0983}

    negative_dict = {"M-H":-1.0078250,"M-H+NaCOOH":66.997429,
                     "M-2H+Na":20.974119,"2M-2H+Na":20.974119,
                     "M-H+HCOOH":45.015485, "2M-H":-1.0078250,
                     "2M-2H+K":37.08265, "M-2H+K":37.08265}

    peaks = []

    if mode == 'pos':
        for key,value in positive_dict.iteritems():
            if '2M' in key:
                new_mass = 2*mass + value
            else:
                new_mass = mass + value
            if '(2+)' in key:
                new_mass = new_mass/2
            new_peak = Peak(new_mass, rt, '0', tag=key, name=name+' '+key)
            peaks.append(new_peak)

    if mode == 'neg':
        for key,value in negative_dict.iteritems():
            if '2M' in key:
                new_mass = 2*mass + value
            else:
                new_mass = mass + value
            if '(2+)' in key:
                new_mass = new_mass/2
            new_peak = Peak(new_mass, rt, '0', tag=key, name=name+' '+key)
            peaks.append(new_peak)

    return peaks
def get_adduct_peak(peak, adduct, mode):
    """
    Adducts are from CAMERA paper supplemental material
    """
    if mode not in ['pos','neg']:
        print "Error: mode must be 'pos' or 'neg'"

    mass = peak.get_mass()
    rt = peak.get_rt()

    positive_dict = {'M+H':1.0078250, 'M+Na':22.989769, 'M+K':39.0983,
                     'M+NH4':18.03773, 'M+2H(2+)':2.01565,
                     'M+Na+HCOOH':69.013079, 'M+H+K(2+)':40.106125,
                     '2M+Na':22.989769, 'M+K+HCOOH':85.12161,
                     '2M+K':39.0983}

    negative_dict = {"M-H":-1.0078250,"M-H+NaCOOH":66.997429,
                     "M-2H+Na":20.974119,"2M-2H+Na":20.974119,
                     "M-H+HCOOH":45.015485, "2M-H":-1.0078250,
                     "2M-2H+K":37.08265, "M-2H+K":37.08265}

    if adduct not in [positive_dict.keys(),negative_dict.keys()]:
        print "Error, adduct not found"


    if mode == 'pos':
        if '2M' in adduct:
            new_mass = 2*mass + positive_dict[adduct]
        else:
            new_mass = mass + positive_dict[adduct]
        if '(2+)' in adduct:
            new_mass = new_mass/2

    if mode == 'neg':
        if '2M' in adduct:
            new_mass = 2*mass + negative_dict[adduct]
        else:
            new_mass = mass + negative_dict[adduct]

    new_peak = Peak(new_mass, rt, '0', adduct)

    return new_peak
def read_peaks(infile):

    fp = open(infile, 'r')

    peaks = []

    for line in fp.readlines()[1:]:
        parts = line.split('\t')

        for part in parts:
            index = int(parts[0].strip('"'))
            mz = float(parts[1])
            rt = float(parts[4])
            areas = []
            for i in range(11, 16):
                areas.append(float(parts[i]))

        peak = Peak(index, mz, rt, areas)
        peaks.append(peak)

    return peaks