def alignAcross(data, field_map, muscle_exec=default_muscle_exec):
    """
    Multiple aligns sequence fields column wise

    Arguments:
      data : DbData object with Receptor objects to process.
      field_map : a dictionary of {input sequence : output sequence) field names to multiple align.
      muscle_exec : the MUSCLE executable.

    Returns:
      changeo.Multiprocessing.DbResult : object containing Receptor objects with multiple aligned sequence fields.
    """
    # Define return object
    result = DbResult(data.id, data.data)
    result.results = data.data
    result.valid = True

    # Fail invalid groups
    if result.id is None:
        result.log = None
        result.valid = False
        return result

    seq_fields = list(field_map.keys())
    for f in seq_fields:
        seq_list = [
            SeqRecord(r.getSeq(f), id=r.sequence_id.replace(' ', '_'))
            for r in data.data
        ]
        seq_aln = runMuscle(seq_list, aligner_exec=muscle_exec)
        if seq_aln is not None:
            aln_map = {x.id: i for i, x in enumerate(seq_aln)}
            for i, r in enumerate(result.results, start=1):
                idx = aln_map[r.sequence_id.replace(' ', '_')]
                seq = str(seq_aln[idx].seq)
                r.annotations[field_map[f]] = seq
                result.log['%s-%s' % (f, r.sequence_id)] = seq
        else:
            result.valid = False

    #for r in result.results:  print r.annotations
    return result
Exemple #2
0
def alignBlocks(data, seq_fields, muscle_exec=default_muscle_exec):
    """
    Multiple aligns blocks of sequence fields together

    Arguments:
      data : a DbData object with IgRecords to process.
      seq_fields : the sequence fields to multiple align.
      muscle_exec : the MUSCLE executable.

    Returns:
      changeo.Multiprocessing.DbResult : object containing IgRecords with multiple aligned sequence fields.
    """
    # Define return object
    result = DbResult(data.id, data.data)
    result.results = data.data
    result.valid = True

    # Fail invalid groups
    if result.id is None:
        result.log = None
        result.valid = False
        return result

    seq_list = [SeqRecord(r.getSeqField(f), id='%s_%s' % (r.id, f)) for f in seq_fields \
                for r in data.data]
    seq_aln = runMuscle(seq_list, aligner_exec=muscle_exec)
    if seq_aln is not None:
        aln_map = {x.id: i for i, x in enumerate(seq_aln)}
        for i, r in enumerate(result.results, start=1):
            for f in seq_fields:
                idx = aln_map['%s_%s' % (r.id, f)]
                seq = str(seq_aln[idx].seq)
                r.annotations['%s_ALIGN' % f] = seq
                result.log['%s-%s' % (f, r.id)] = seq

    else:
        result.valid = False

    #for r in result.results:  print r.annotations
    return result
def alignWithin(data, field_map, muscle_exec=default_muscle_exec):
    """
    Multiple aligns sequence fields within a row

    Arguments:
      data : DbData object with Receptor objects to process.
      field_map : a dictionary of {input sequence : output sequence) field names to multiple align.
      muscle_exec : the MUSCLE executable.

    Returns:
      changeo.Multiprocessing.DbResult : object containing Receptor objects with multiple aligned sequence fields.
    """
    # Define return object
    result = DbResult(data.id, data.data)
    result.results = data.data
    result.valid = True

    # Fail invalid groups
    if result.id is None:
        result.log = None
        result.valid = False
        return result

    record = data.data
    seq_fields = list(field_map.keys())
    seq_list = [SeqRecord(record.getSeq(f), id=f) for f in seq_fields]
    seq_aln = runMuscle(seq_list, aligner_exec=muscle_exec)
    if seq_aln is not None:
        aln_map = {x.id: i for i, x in enumerate(seq_aln)}
        for f in seq_fields:
            idx = aln_map[f]
            seq = str(seq_aln[idx].seq)
            record.annotations[field_map[f]] = seq
            result.log[f] = seq
    else:
        result.valid = False

    return result
Exemple #4
0
def alignWithin(data, seq_fields, muscle_exec=default_muscle_exec):
    """
    Multiple aligns sequence fields within a row

    Arguments:
      data : a DbData object with an IgRecords to process.
      seq_fields : the sequence fields to multiple align.
      muscle_exec : the MUSCLE executable.

    Returns:
      changeo.Multiprocessing.DbResult : object containing IgRecords with multiple aligned sequence fields.
    """
    # Define return object
    result = DbResult(data.id, data.data)
    result.results = data.data
    result.valid = True

    # Fail invalid groups
    if result.id is None:
        result.log = None
        result.valid = False
        return result

    record = data.data
    seq_list = [SeqRecord(record.getSeqField(f), id=f) for f in seq_fields]
    seq_aln = runMuscle(seq_list, aligner_exec=muscle_exec)
    if seq_aln is not None:
        aln_map = {x.id: i for i, x in enumerate(seq_aln)}
        for f in seq_fields:
            idx = aln_map[f]
            seq = str(seq_aln[idx].seq)
            record.annotations['%s_ALIGN' % f] = seq
            result.log[f] = seq
    else:
        result.valid = False

    return result