Exemple #1
0
    def format(self, new_file_name=None):
        fin = open(self.orig_file)
        csvReader = csv.reader(fin)

        headers = map(lambda x: x.strip(), csvReader.next())
        new_headers = self.convert_headers(headers)
        spec_desc = new_headers.index('Spectrum Description')
        mz        = new_headers.index('Experimental mz')
        charge    = new_headers.index('Charge')
        seq       = new_headers.index('Peptide Sequence')
        var_mods  = new_headers.index('Variable Modifications')
        new_data = []
        for prerow in csvReader:
            row = map(lambda x: x.strip(), prerow)
            row[spec_desc] = self.convert_spectrum(row[spec_desc])
            row[mz] = self.convert_mass(row[mz],row[charge])
            (the_seq,the_mods) = self.convert_seq(row[seq])
            row[seq] = the_seq
            row[var_mods] = the_mods
            new_data.append( row )
        fin.close()


        dir_split = os.path.split(self.orig_file)
        if not new_file_name:
            new_file_name = os.path.join(dir_split[0], "mz_" + dir_split[1])

        report = mz.Report.writer(new_file_name, columns = new_headers)
        for data in new_data:
            report.write(data)

        report.close()
Exemple #2
0
def format_XML(xTandem_file, save_file, rev_mods=None, db_regex=None):
    '''Create a report from the xTandem output (which is an XML file)'''

    rows = xTandemArray(xTandem_file, rev_mods, db_regex)

    if os.path.exists(save_file):
        os.remove(save_file)

    report = mz.Report.writer(save_file, columns=['Expect'],
                                         default_columns=True)

    for row in rows:
        report.write(row)

    report.close()
Exemple #3
0
    def format(self, new_file_name=None):
        fh = open(self.orig_file)

        #Change header names
        header_line = fh.readline()
        headers = header_line.strip().split('\t')

        new_headers = multiplierz.mzReport.default_columns[:]
        new_headers.extend(('MS2 Time', 'Protein Coverage'))
        new_headers.remove('Peptide Rank')
        new_headers.remove('Query')
        new_headers.extend(h for h in headers if h not in self.reps)
        if 'Unused' in new_headers:
            new_headers.remove('Unused')
        if 'Contrib' in new_headers:
            new_headers.remove('Contrib')
        if 'Sc' in new_headers:
            new_headers.remove('Sc')

        rows = []

        protein_matches = defaultdict(int)

        for line in fh:
            new_data = dict((h,None) for h in new_headers)
            data = dict(zip(headers,line[:-1].split('\t')))

            protein_matches[data['Accessions']] += 1

            for h in data:
                if h in self.reps and self.reps[h] in new_data:
                    new_data[self.reps[h]] = data[h]
                elif h in new_data:
                    new_data[h] = data[h]
                elif h not in ('Unused','Contrib','Sc'):
                    logger_message(10, 'Missing key: %s' % h)

            if 'Modifications' in data:
                new_data['Variable Modifications'] = self.convert_var_mod(data['Modifications'])

            if 'Cleavages' in data:
                new_data['Missed Cleavages'] = data['Cleavages'].count('missed')

            if 'Time' in data and 'Spectrum' in data:
                new_data['Spectrum Description'] =  self.convert_spectrum(data['Spectrum'], data['Time'])

            rows.append(new_data)

        fh.close()

        for row in rows:
            row['Protein Matches'] = protein_matches[row['Accession Number']]

        dir_split = os.path.split(self.orig_file)
        if not new_file_name:
            new_file_name = os.path.join(dir_split[0], "mz_" + dir_split[1])

        report = mz.Report.writer(new_file_name, columns=new_headers)

        for row in rows:
            report.write(row)

        report.close()