Example #1
0
# Check that the argument exists and is a file
if not os.path.isfile(DataFile):
  print DataFile, 'does not exist or is not a file.'
  exit(1)

# Read in the sequences as a dictionary. They are (nominally) not aligned.
Aligned=False
SeqDict, FirstSeqLength = ReadSequencesFromFile(DataFile, Aligned)

# We are expecting two sequences.
if len(SeqDict) != 2:
  print 'Expected 2 sequences;', DataFile, 'contains', str(len(SeqDict)) +\
  '.\nQuitting.'
  exit(1)

SeqNames = SeqDict.keys()
Seqs     = SeqDict.values()

# If the two sequences are the same length, there is no shuffling to do. Print
# them as they are.
if len(Seqs[0]) == len(Seqs[1]):
  for SeqName, seq in SeqDict.items():
    print '>'+SeqName
    print seq
  exit(0)

# Find which of the two sequences is the shorter one
if len(SeqDict[SeqNames[0]]) < len(SeqDict[SeqNames[1]]):
  ShorterSeqName, ShorterSeq, LongerSeqName, LongerSeq = \
  SeqNames[0], SeqDict[SeqNames[0]], SeqNames[1], SeqDict[SeqNames[1]]
else:
# Check this file is called from the command line with one argument
if len(sys.argv[1:]) != 1:
  print 'Incorrect number of arguments given.'
  print 'Usage:\n', sys.argv[0], 'NameOfYourFastaFile.fasta'
  exit(1)
DataFile = sys.argv[1]

# Check that the argument exists and is a file
if not os.path.isfile(DataFile):
  print DataFile, 'does not exist or is not a file.'
  exit(1)

# Read in all the sequences as a dictionary
AllSequences, SequenceLength = ReadSequencesFromFile(DataFile)

SequenceNames = AllSequences.keys()
sequences     = AllSequences.values()
NumSequences  = len(AllSequences)

DataFromAllPositions = []
SetOfAllBasesEncountered = []

# For brevity
acgt = ['A','C','G','T']


AllExpectedBases = acgt + IUPACdict.keys() + GapChars



TotalEntropy = 0    
Example #3
0
    exit(1)

# Rename arguments for brevity / clarity.
MainAlnFile = args.MainAlignmentFile
PairedAlnFile = args.PairedAlignmentFile
ExciseUniqueInsertionsOfRefInMainAlignment = args.excise

# Check that the arguments exist and are files
for InputFile in [MainAlnFile, PairedAlnFile]:
    if not os.path.isfile(InputFile):
        print(InputFile, 'does not exist or is not a file.', file=sys.stderr)
        exit(1)

# Read in the sequences from the main alignment file (into a dictionary)
MainAlnSeqDict, MainAlnSeqLength = ReadSequencesFromFile(MainAlnFile)
MainAlnSeqNames = MainAlnSeqDict.keys()
MainAlnSeqs = MainAlnSeqDict.values()

# Read in the sequences from the paired alignment file
PairedAlnSeqDict, PairedAlnSeqLength = ReadSequencesFromFile(PairedAlnFile)

# Check it has two sequences
if len(PairedAlnSeqDict) != 2:
    print('File', PairedAlnFile, 'contains', len(PairedAlnSeqDict),\
    'sequences; two were expected.\nQuitting.', file=sys.stderr)
    exit(1)
Seq1name, Seq2name = PairedAlnSeqDict.keys()

# Check that one of the sequences is in the main alignment file (the 'Ref')
# and one is not (the 'SeqToAdd').
if Seq1name in MainAlnSeqNames: