def antibodies(self): antibodies = [] with open(self.__csvfile) as fh: sample = fh.read(MonogramData.__sample_len) sniffer = csv_sniffer() dialect = sniffer.sniff(sample) if not sniffer.has_header(sample): raise ValueError(MonogramData.__no_header_msg) fh.seek(0) reader = csv_reader(fh, dialect) # grab everything after the accession column in the header row for row in reader: antibodies.extend(r.strip() for r in row[1:]) break return antibodies
def seqrecords(self, antibodies, clonal=False): if clonal: raise ValueError( 'clonal property is not available with Monogram datasets') if len(antibodies) > 1: raise ValueError( 'only one antibody can be interrogated with Monogram datasets') seqrecords = [] with open(self.__fastafile) as h: source = Verifier(SeqIO.parse(h, 'fasta'), DNAAlphabet) try: seqrecords = list(source) except VerifyError: source.set_alphabet(AminoAlphabet) seqrecords = list(source) underdash = re_compile(r'[_-](\d+)$') for r in seqrecords: r.id = underdash.sub(r'_\1', r.id) ic50s = dict((r.id, []) for r in seqrecords) with open(self.__csvfile) as fh: sample = fh.read(MonogramData.__sample_len) sniffer = csv_sniffer() dialect = sniffer.sniff(sample) if not sniffer.has_header(sample): raise ValueError(MonogramData.__no_header_msg) fh.seek(0) reader = csv_reader(fh, dialect) columns = None for i, row in enumerate(reader): if columns is None: columns = dict((v.strip(), j) for j, v in enumerate(row)) missing = set(antibodies) - set(columns.keys()) if len(missing): raise ValueError("antibodies ('%s') not found!" % "', '".join(missing)) else: acc = underdash.sub(r'_\1', row[0]) try: if acc in ic50s: cln_ic50s = [ float(row[columns[ab]].strip().lstrip('<>')) for ab in antibodies if ab in columns and columns[ab] < len(row) ] ic50s[acc].extend(cln_ic50s) except: pass drop = [] for i, r in enumerate(seqrecords): if r.id not in ic50s or len(ic50s[r.id]) == 0: drop.append(i) warn("skipping sequence '%s', VALUE not found" % r.id) else: values = {'IC50': ic50s[r.id]} r.description = json_dumps({ 'ab': antibodies[0], 'values': values }) r.annotations['antibody'] = values for i in sorted(drop, reverse=True): del seqrecords[i] return seqrecords, clonal, antibodies
def seqrecords(self, antibodies, clonal=False): if clonal: raise ValueError('clonal property is not available with Monogram datasets') if len(antibodies) > 1: raise ValueError('only one antibody can be interrogated with Monogram datasets') seqrecords = [] with open(self.__fastafile) as h: source = Verifier(SeqIO.parse(h, 'fasta'), DNAAlphabet) try: seqrecords = list(source) except VerifyError: source.set_alphabet(AminoAlphabet) seqrecords = list(source) underdash = re_compile(r'[_-](\d+)$') for r in seqrecords: r.id = underdash.sub(r'_\1', r.id) ic50s = dict((r.id, []) for r in seqrecords) with open(self.__csvfile) as fh: sample = fh.read(MonogramData.__sample_len) sniffer = csv_sniffer() dialect = sniffer.sniff(sample) if not sniffer.has_header(sample): raise ValueError(MonogramData.__no_header_msg) fh.seek(0) reader = csv_reader(fh, dialect) columns = None for i, row in enumerate(reader): if columns is None: columns = dict((v.strip(), j) for j, v in enumerate(row)) missing = set(antibodies) - set(columns.keys()) if len(missing): raise ValueError("antibodies ('%s') not found!" % "', '".join(missing)) else: acc = underdash.sub(r'_\1', row[0]) try: if acc in ic50s: cln_ic50s = [float(row[columns[ab]].strip().lstrip('<>')) for ab in antibodies if ab in columns and columns[ab] < len(row)] ic50s[acc].extend(cln_ic50s) except: pass drop = [] for i, r in enumerate(seqrecords): if r.id not in ic50s or len(ic50s[r.id]) == 0: drop.append(i) warn("skipping sequence '%s', VALUE not found" % r.id) else: values = {'IC50': ic50s[r.id]} r.description = json_dumps({ 'ab': antibodies[0], 'values': values }) r.annotations['antibody'] = values for i in sorted(drop, reverse=True): del seqrecords[i] return seqrecords, clonal, antibodies