Ejemplo n.º 1
0
def exercise_validate_sequence():
    from iotbx.pdb.amino_acid_codes import validate_sequence
    test_sequence = 'abcdefghijklmnopqrstuvwxyz'
    a = validate_sequence(test_sequence,
                          protein=True,
                          strict_protein=True,
                          nucleic_acid=False)
    assert (len(a) == 3)
    a = validate_sequence(test_sequence,
                          protein=True,
                          strict_protein=False,
                          nucleic_acid=False)
    assert (len(a) == 1)
    a = validate_sequence(test_sequence,
                          nucleic_acid=True,
                          strict_nucleic_acid=True,
                          protein=False)
    assert (len(a) == 21)
    a = validate_sequence(test_sequence,
                          nucleic_acid=True,
                          strict_nucleic_acid=False,
                          protein=False)
    assert (len(a) == 10)
    a = validate_sequence(test_sequence,
                          protein=True,
                          strict_protein=True,
                          nucleic_acid=True,
                          strict_nucleic_acid=True)
    assert (len(a) == 3)
Ejemplo n.º 2
0
def exercise_validate_sequence():
    from iotbx.pdb.amino_acid_codes import validate_sequence

    test_sequence = "abcdefghijklmnopqrstuvwxyz"
    a = validate_sequence(test_sequence, protein=True, strict_protein=True, nucleic_acid=False)
    assert len(a) == 6
    a = validate_sequence(test_sequence, protein=True, strict_protein=False, nucleic_acid=False)
    assert len(a) == 2
    a = validate_sequence(test_sequence, nucleic_acid=True, strict_nucleic_acid=True, protein=False)
    assert len(a) == 21
    a = validate_sequence(test_sequence, nucleic_acid=True, strict_nucleic_acid=False, protein=False)
    assert len(a) == 10
    a = validate_sequence(test_sequence, protein=True, strict_protein=True, nucleic_acid=True, strict_nucleic_acid=True)
    assert len(a) == 5
Ejemplo n.º 3
0
def get_muscle_alignment_ordered(sequences, out = None):

  from iotbx import bioinformatics
  from iotbx.pdb.amino_acid_codes import validate_sequence

  name_for = {}

  for ( i, seq ) in enumerate( sequences, start = 1 ):
    name = name_for.get( seq, "Chain_%d" % i )
    name_for[ seq ] = name

  alignment, errors = get_muscle_alignment(
    fasta_sequences = "\n".join(
      str( bioinformatics.sequence( name = name, sequence = seq.sequence ) )
      for ( seq, name ) in name_for.items()
      ),
    out = out,
    )

  # check for errors and handle:
  #   invalid characters in sequences
  if (len(errors) > 0):
    for error in errors:
      error = error.strip()
      if ('Invalid character' in error):
        for seq in name_for.keys():
          invalid = validate_sequence(
            seq.sequence, protein=True, strict_protein=False,
            nucleic_acid=True, strict_nucleic_acid=False)
          if (len(invalid) > 0):
            name_for.pop(seq)
        sequences = name_for.keys()
      elif (len(error) > 0):
        raise Sorry(error)

  lookup = dict( zip( alignment.names, alignment.alignments ) )
  assert all( n in lookup for n in name_for.values() )

  return bioinformatics.clustal_alignment(
    names = [ seq.name for seq in sequences ],
    alignments = [ lookup[ name_for[ seq ] ] for seq in sequences ],
    program = alignment.program
    )