def alignAcross(data, field_map, muscle_exec=default_muscle_exec): """ Multiple aligns sequence fields column wise Arguments: data : DbData object with Receptor objects to process. field_map : a dictionary of {input sequence : output sequence) field names to multiple align. muscle_exec : the MUSCLE executable. Returns: changeo.Multiprocessing.DbResult : object containing Receptor objects with multiple aligned sequence fields. """ # Define return object result = DbResult(data.id, data.data) result.results = data.data result.valid = True # Fail invalid groups if result.id is None: result.log = None result.valid = False return result seq_fields = list(field_map.keys()) for f in seq_fields: seq_list = [ SeqRecord(r.getSeq(f), id=r.sequence_id.replace(' ', '_')) for r in data.data ] seq_aln = runMuscle(seq_list, aligner_exec=muscle_exec) if seq_aln is not None: aln_map = {x.id: i for i, x in enumerate(seq_aln)} for i, r in enumerate(result.results, start=1): idx = aln_map[r.sequence_id.replace(' ', '_')] seq = str(seq_aln[idx].seq) r.annotations[field_map[f]] = seq result.log['%s-%s' % (f, r.sequence_id)] = seq else: result.valid = False #for r in result.results: print r.annotations return result
def alignBlocks(data, seq_fields, muscle_exec=default_muscle_exec): """ Multiple aligns blocks of sequence fields together Arguments: data : a DbData object with IgRecords to process. seq_fields : the sequence fields to multiple align. muscle_exec : the MUSCLE executable. Returns: changeo.Multiprocessing.DbResult : object containing IgRecords with multiple aligned sequence fields. """ # Define return object result = DbResult(data.id, data.data) result.results = data.data result.valid = True # Fail invalid groups if result.id is None: result.log = None result.valid = False return result seq_list = [SeqRecord(r.getSeqField(f), id='%s_%s' % (r.id, f)) for f in seq_fields \ for r in data.data] seq_aln = runMuscle(seq_list, aligner_exec=muscle_exec) if seq_aln is not None: aln_map = {x.id: i for i, x in enumerate(seq_aln)} for i, r in enumerate(result.results, start=1): for f in seq_fields: idx = aln_map['%s_%s' % (r.id, f)] seq = str(seq_aln[idx].seq) r.annotations['%s_ALIGN' % f] = seq result.log['%s-%s' % (f, r.id)] = seq else: result.valid = False #for r in result.results: print r.annotations return result
def alignWithin(data, field_map, muscle_exec=default_muscle_exec): """ Multiple aligns sequence fields within a row Arguments: data : DbData object with Receptor objects to process. field_map : a dictionary of {input sequence : output sequence) field names to multiple align. muscle_exec : the MUSCLE executable. Returns: changeo.Multiprocessing.DbResult : object containing Receptor objects with multiple aligned sequence fields. """ # Define return object result = DbResult(data.id, data.data) result.results = data.data result.valid = True # Fail invalid groups if result.id is None: result.log = None result.valid = False return result record = data.data seq_fields = list(field_map.keys()) seq_list = [SeqRecord(record.getSeq(f), id=f) for f in seq_fields] seq_aln = runMuscle(seq_list, aligner_exec=muscle_exec) if seq_aln is not None: aln_map = {x.id: i for i, x in enumerate(seq_aln)} for f in seq_fields: idx = aln_map[f] seq = str(seq_aln[idx].seq) record.annotations[field_map[f]] = seq result.log[f] = seq else: result.valid = False return result
def alignWithin(data, seq_fields, muscle_exec=default_muscle_exec): """ Multiple aligns sequence fields within a row Arguments: data : a DbData object with an IgRecords to process. seq_fields : the sequence fields to multiple align. muscle_exec : the MUSCLE executable. Returns: changeo.Multiprocessing.DbResult : object containing IgRecords with multiple aligned sequence fields. """ # Define return object result = DbResult(data.id, data.data) result.results = data.data result.valid = True # Fail invalid groups if result.id is None: result.log = None result.valid = False return result record = data.data seq_list = [SeqRecord(record.getSeqField(f), id=f) for f in seq_fields] seq_aln = runMuscle(seq_list, aligner_exec=muscle_exec) if seq_aln is not None: aln_map = {x.id: i for i, x in enumerate(seq_aln)} for f in seq_fields: idx = aln_map[f] seq = str(seq_aln[idx].seq) record.annotations['%s_ALIGN' % f] = seq result.log[f] = seq else: result.valid = False return result