if not os.path.isfile(DataFile): print DataFile, 'does not exist or is not a file.' exit(1) # Read in the sequences as a dictionary. They are (nominally) not aligned. Aligned=False SeqDict, FirstSeqLength = ReadSequencesFromFile(DataFile, Aligned) # We are expecting two sequences. if len(SeqDict) != 2: print 'Expected 2 sequences;', DataFile, 'contains', str(len(SeqDict)) +\ '.\nQuitting.' exit(1) SeqNames = SeqDict.keys() Seqs = SeqDict.values() # If the two sequences are the same length, there is no shuffling to do. Print # them as they are. if len(Seqs[0]) == len(Seqs[1]): for SeqName, seq in SeqDict.items(): print '>'+SeqName print seq exit(0) # Find which of the two sequences is the shorter one if len(SeqDict[SeqNames[0]]) < len(SeqDict[SeqNames[1]]): ShorterSeqName, ShorterSeq, LongerSeqName, LongerSeq = \ SeqNames[0], SeqDict[SeqNames[0]], SeqNames[1], SeqDict[SeqNames[1]] else: ShorterSeqName, ShorterSeq, LongerSeqName, LongerSeq = \
if len(sys.argv[1:]) != 1: print 'Incorrect number of arguments given.' print 'Usage:\n', sys.argv[0], 'NameOfYourFastaFile.fasta' exit(1) DataFile = sys.argv[1] # Check that the argument exists and is a file if not os.path.isfile(DataFile): print DataFile, 'does not exist or is not a file.' exit(1) # Read in all the sequences as a dictionary AllSequences, SequenceLength = ReadSequencesFromFile(DataFile) SequenceNames = AllSequences.keys() sequences = AllSequences.values() NumSequences = len(AllSequences) DataFromAllPositions = [] SetOfAllBasesEncountered = [] # For brevity acgt = ['A','C','G','T'] AllExpectedBases = acgt + IUPACdict.keys() + GapChars TotalEntropy = 0
# Rename arguments for brevity / clarity. MainAlnFile = args.MainAlignmentFile PairedAlnFile = args.PairedAlignmentFile ExciseUniqueInsertionsOfRefInMainAlignment = args.excise # Check that the arguments exist and are files for InputFile in [MainAlnFile, PairedAlnFile]: if not os.path.isfile(InputFile): print(InputFile, 'does not exist or is not a file.', file=sys.stderr) exit(1) # Read in the sequences from the main alignment file (into a dictionary) MainAlnSeqDict, MainAlnSeqLength = ReadSequencesFromFile(MainAlnFile) MainAlnSeqNames = MainAlnSeqDict.keys() MainAlnSeqs = MainAlnSeqDict.values() # Read in the sequences from the paired alignment file PairedAlnSeqDict, PairedAlnSeqLength = ReadSequencesFromFile(PairedAlnFile) # Check it has two sequences if len(PairedAlnSeqDict) != 2: print('File', PairedAlnFile, 'contains', len(PairedAlnSeqDict),\ 'sequences; two were expected.\nQuitting.', file=sys.stderr) exit(1) Seq1name, Seq2name = PairedAlnSeqDict.keys() # Check that one of the sequences is in the main alignment file (the 'Ref') # and one is not (the 'SeqToAdd'). if Seq1name in MainAlnSeqNames: if Seq2name in MainAlnSeqNames: