Ejemplo n.º 1
def ReadReferenceFromFile(File):
  '''Read in all sequences in the reference file; check there is only one.'''
  AllSequences, ReferenceLength = ReadSequencesFromFile(File,False)
  if len(AllSequences) != 1:
    print('Found', len(AllSequences), 'sequences in', ReferenceFile+\
    '; expected 1.\nQuitting.', file=sys.stderr)
  return AllSequences.items(), ReferenceLength
Ejemplo n.º 2
GapChars = ['-','.','?']

# Check this file is called from the command line with one argument
if len(sys.argv[1:]) != 1:
  print 'Incorrect number of arguments given.'
  print 'Usage:\n', sys.argv[0], 'NameOfYourFastaFile.fasta'
DataFile = sys.argv[1]

# Check that the argument exists and is a file
if not os.path.isfile(DataFile):
  print DataFile, 'does not exist or is not a file.'

# Read in all the sequences as a dictionary
AllSequences, SequenceLength = ReadSequencesFromFile(DataFile)

SequenceNames = AllSequences.keys()
sequences     = AllSequences.values()
NumSequences  = len(AllSequences)

DataFromAllPositions = []
SetOfAllBasesEncountered = []

# For brevity
acgt = ['A','C','G','T']

AllExpectedBases = acgt + IUPACdict.keys() + GapChars

Ejemplo n.º 3
# Check this file is called from the command line with one argument
if len(sys.argv[1:]) != 1:
  print 'Incorrect number of arguments given.'
  print 'Usage:\n', sys.argv[0], 'NameOfYourFastaFile.fasta'
DataFile = sys.argv[1]

# Check that the argument exists and is a file
if not os.path.isfile(DataFile):
  print DataFile, 'does not exist or is not a file.'

# Read in the sequences as a dictionary. They are (nominally) not aligned.
SeqDict, FirstSeqLength = ReadSequencesFromFile(DataFile, Aligned)

# We are expecting two sequences.
if len(SeqDict) != 2:
  print 'Expected 2 sequences;', DataFile, 'contains', str(len(SeqDict)) +\

SeqNames = SeqDict.keys()
Seqs     = SeqDict.values()

# If the two sequences are the same length, there is no shuffling to do. Print
# them as they are.
if len(Seqs[0]) == len(Seqs[1]):
  for SeqName, seq in SeqDict.items():
    print '>'+SeqName
Ejemplo n.º 4
        for GapChar in GapChars:
            if GapChar in PrimerList[i]:
                print('SubSeq', PrimerList[i], 'contains a gap. This is unexpected.'+\
                '\nQuitting.', file=sys.stderr)
    CounterObject = collections.Counter(PrimerList)
    DuplicatedPrimers = [i for i in CounterObject if CounterObject[i] > 1]
    if len(DuplicatedPrimers) != 0:
        for DuplicatedPrimer in DuplicatedPrimers:
            print('SubSeq', DuplicatedPrimer, 'was specified twice with the same',\
            'option.', file=sys.stderr)
        print('Quitting.', file=sys.stderr)

# Read in the sequences from the alignment file (into a dictionary)
SeqDict, AlignmentLength = ReadSequencesFromFile(AlignmentFile)

# Check the chosen reference is in the alignment
if not ChosenRef in SeqDict:
    print('Could not find',
          AlignmentFile + '.\nQuitting.',
ChosenRefSeq = SeqDict[ChosenRef]

# Define the set of unique primers, i.e. StartPrimers+EndPrimers but not
# double counting those that appear in both. Record their lengths in a dict.
AllUniquePrimers = StartPrimers + \
[primer for primer in EndPrimers if not primer in StartPrimers]
Ejemplo n.º 5
    'specified.', file=sys.stderr)

# Rename arguments for brevity / clarity.
MainAlnFile = args.MainAlignmentFile
PairedAlnFile = args.PairedAlignmentFile
ExciseUniqueInsertionsOfRefInMainAlignment = args.excise

# Check that the arguments exist and are files
for InputFile in [MainAlnFile, PairedAlnFile]:
    if not os.path.isfile(InputFile):
        print(InputFile, 'does not exist or is not a file.', file=sys.stderr)

# Read in the sequences from the main alignment file (into a dictionary)
MainAlnSeqDict, MainAlnSeqLength = ReadSequencesFromFile(MainAlnFile)
MainAlnSeqNames = MainAlnSeqDict.keys()
MainAlnSeqs = MainAlnSeqDict.values()

# Read in the sequences from the paired alignment file
PairedAlnSeqDict, PairedAlnSeqLength = ReadSequencesFromFile(PairedAlnFile)

# Check it has two sequences
if len(PairedAlnSeqDict) != 2:
    print('File', PairedAlnFile, 'contains', len(PairedAlnSeqDict),\
    'sequences; two were expected.\nQuitting.', file=sys.stderr)
Seq1name, Seq2name = PairedAlnSeqDict.keys()

# Check that one of the sequences is in the main alignment file (the 'Ref')
# and one is not (the 'SeqToAdd').