Exemple #1
0
 def test_allowed_chars_can_be_specified(self):
     self.assertEqual(('', '', [('A', 0)]),
                      extract_sequence_from_fasta('ABCD',
                                                  allowed_chars="BCD"))
     self.assertEqual(('', '', [('A', 0)]),
                      extract_sequence_from_fasta('ABCD-DC',
                                                  allowed_chars="-BCD"))
Exemple #2
0
 def test_a_real_sequence_should_parse_correctly(self):
     self.assertEqual(
         ('>sp|P30559|OXYR_HUMAN Oxytocin receptor OS=H**o sapiens GN=OXTR PE=1 SV=2',
          'MEGALAANWSAEAANASAAPPGAEGNRTAGPPRRNEALARVEVAVLCLILLLALSGNACVLLALRTTRQKHSRLFFFMKHLSIADLVVAVFQVLPQLLWDITFRFYGPDLLCRLVKYLQVVGMFASTYLLLLMSLDRCLAICQPLRSLRRRTDRLAVLATWLGCLVASAPQVHIFSLREVADGVFDCWAVFIQPWGPKAYITWITLAVYIVPVIVLAACYGLISFKIWQNLRLKTAAAAAAEAPEGAAAGDGGRVALARVSSVKLISKAKIRTVKMTFIIVLAFIVCWTPFFFVQMWSVWDANAPKEASAFIIVMLLASLNSCCNPWIYMLFTGHLFHELVQRFLCCSASYLKGRRLGETSASKKSNSSSFVLSHRSSSQRSCSQPSTA',
          []),
         extract_sequence_from_fasta(
             '>sp|P30559|OXYR_HUMAN Oxytocin receptor OS=H**o sapiens GN=OXTR PE=1 SV=2\nMEGALAANWSAEAANASAAPPGAEGNRTAGPPRRNEALARVEVAVLCLILLLALSGNACVLLALRTTRQKHSRLFFFMKH\nLSIADLVVAVFQVLPQLLWDITFRFYGPDLLCRLVKYLQVVGMFASTYLLLLMSLDRCLAICQPLRSLRRRTDRLAVLAT\nWLGCLVASAPQVHIFSLREVADGVFDCWAVFIQPWGPKAYITWITLAVYIVPVIVLAACYGLISFKIWQNLRLKTAAAAA\nAEAPEGAAAGDGGRVALARVSSVKLISKAKIRTVKMTFIIVLAFIVCWTPFFFVQMWSVWDANAPKEASAFIIVMLLASL\nNSCCNPWIYMLFTGHLFHELVQRFLCCSASYLKGRRLGETSASKKSNSSSFVLSHRSSSQRSCSQPSTA'
         ))
 def clean_input(self):
     input = self.cleaned_data['input']
     defline, sequence, errors = extract_sequence_from_fasta(input)
     if sequence:
         if set([char.upper() for char in sequence]).issubset(set(['A', 'C', 'T', 'G'])):
             raise forms.ValidationError('''Error: You appear to have submitted a nucleotide 
                 sequence. Only amino acid sequences are accepted.''')
     else:
         if not errors:
             raise forms.ValidationError("You did not submit a sequence")
         error_list = []
         for error in errors:
             error_list.append("""position %d,('%s')""" % (error[1] + 1, error[0]))  
         error_list = ', '.join(error_list)
         if ">" in error_list:
             raise forms.ValidationError('''Error: illegal characters were found at: %s
                 . \nYou may have submitted more than one sequence.''' % error_list)
             raise forms.ValidationError("Error: illegal characters were found at: %s" % error_list)
         raise forms.ValidationError("Error: illegal characters were found at: %s" % error_list)
     return input
def get_uniprot_id_from_fasta(fasta_seq):
    """Retrieve UniProt identifier from a FASTA sequence
  
  Returns -- tuple (uniprot identifier, record)
  
  """
    # TODO: assume properly formatted sequence; don't do validation here
    (defline, sequence, errors) = extract_sequence_from_fasta(fasta_seq)
    if errors:
        return (None, None)
    proper_seq = '\n'.join([defline, sequence.encode('ascii')])
    handle = StringIO.StringIO(proper_seq)
    record = SeqIO.parse(handle, "fasta").next()
    record_id = record.id
    header = record.description
    aa_seguid = seguid(record.seq)

    objects = UniProt.objects.filter(seguid=aa_seguid).all()

    if len(objects) == 0:
        return (None, record)

    for object in objects:
        uniprot_identifier = object.uniprot_identifier
        # Is this a holdover from the chained headers used previously?
        if uniprot_identifier and (uniprot_identifier in record_id):
            return (object.uniprot_identifier, record)

    for object in objects:
        uniprot_taxon = object.taxon
        if uniprot_taxon and getattr(
                uniprot_taxon, 'scientific_name',
                False) and uniprot_taxon.scientific_name in header:
            return (object.uniprot_identifier, record)

    return (objects[0].uniprot_identifier, record)
Exemple #5
0
 def test_a_sequence_may_have_at_most_one_defline(self):
     self.assertEqual(('>', '', [('>', 0), ('o', 2), ('o', 3)]),
                      extract_sequence_from_fasta(">\n>\n\nfoo\nA"))
Exemple #6
0
 def test_a_sequence_may_contain_numbers(self):
     self.assertEqual(('', 'ACCANMA', []),
                      extract_sequence_from_fasta("ACCANM 7 A "))
Exemple #7
0
 def test_a_defline_can_begin_with_multiple_gt_signs(self):
     self.assertEqual(('>>foo', 'ACCMNT', []),
                      extract_sequence_from_fasta(">>foo\nACCMNT"))
Exemple #8
0
 def test_a_defline_can_contain_multiple_gt_signs(self):
     self.assertEqual(('>foo>bar', 'ACCMNT', []),
                      extract_sequence_from_fasta(">foo>bar\nACCMNT"))
Exemple #9
0
 def test_a_sequence_cannot_just_have_mutliple_deflines(self):
     self.assertEqual(('>defline1', '', [('>', 0)]),
                      extract_sequence_from_fasta(">defline1\n>defline2\n"))
Exemple #10
0
 def test_a_sequence_cannot_begin_with_a_gt(self):
     self.assertEqual(('>', '', [('>', 0)]),
                      extract_sequence_from_fasta(">\n>A"))
Exemple #11
0
 def test_a_mostly_empty_defline_is_OK(self):
     self.assertEqual(('>', "ACLACTR", []),
                      extract_sequence_from_fasta(">\nACLACTR"))
Exemple #12
0
 def test_a_sequence_with_blacklist_chars(self):
     self.assertEqual(('', '', [("B", 1), ("J", 2)]),
                      extract_sequence_from_fasta("ABJA"))
Exemple #13
0
 def test_a_sequence_may_have_an_asterisk_only_at_the_end(self):
     self.assertEqual(('', '', [('*', 7)]),
                      extract_sequence_from_fasta("ACLACTR*AC"))
Exemple #14
0
 def test_an_input_is_required(self):
     self.assertEqual(('', '', []), extract_sequence_from_fasta(''))
Exemple #15
0
 def test_a_sequence_may_end_with_an_asterisk(self):
     self.assertEqual(('', "ACLACTR", []),
                      extract_sequence_from_fasta("ACLACTR*"))
Exemple #16
0
 def test_a_sequence_may_not_contain_non_alphanumeric_chars(self):
     self.assertEqual(('', '', [('?', 3)]),
                      extract_sequence_from_fasta("ACL?CA"))
Exemple #17
0
 def test_a_sequence_may_contain_newlines_tabs_and_spaces(self):
     self.assertEqual(('', "ACLACTR", []),
                      extract_sequence_from_fasta("ACL\nA CT\tR"))
Exemple #18
0
 def test_an_input_cannot_simply_be_whitespace(self):
     self.assertEqual(('', '', []), extract_sequence_from_fasta('    '))
Exemple #19
0
 def test_a_sequence_cannot_just_have_a_defline(self):
     self.assertEqual(('>', '', []), extract_sequence_from_fasta(">\n"))
     self.assertEqual(('>some text here', '', []),
                      extract_sequence_from_fasta(">some text here\n"))
Exemple #20
0
 def test_a_sequence_does_not_require_a_defline(self):
     self.assertEqual(('', "ACLACTR", []),
                      extract_sequence_from_fasta("ACLACTR"))
     self.assertEqual(('>foo', "ACLACTR", []),
                      extract_sequence_from_fasta(">foo\nACLACTR"))
Exemple #21
0
 def test_the_case_of_a_sequence_does_not_matter(self):
     self.assertEqual(('', "acca", []), extract_sequence_from_fasta("acca"))