Python ReadSequencesFromFile Examples

Programming Language: Python

Namespace/Package Name: AuxiliaryFunctions

Examples at hotexamples.com: 7

Python ReadSequencesFromFile - 7 examples found. These are the top rated real world Python examples of AuxiliaryFunctions.ReadSequencesFromFile extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

ReadSequencesFromFile(5)

items(3)

keys(3)

values(3)

Example #1

Show file

def ReadReferenceFromFile(File):
  '''Read in all sequences in the reference file; check there is only one.'''
  AllSequences, ReferenceLength = ReadSequencesFromFile(File,False)
  if len(AllSequences) != 1:
    print('Found', len(AllSequences), 'sequences in', ReferenceFile+\
    '; expected 1.\nQuitting.', file=sys.stderr)
    exit(1)
  return AllSequences.items(), ReferenceLength

Example #2

Show file

File: CalculateEntropy.py Project: sdwfrost/SeqAnalTools

GapChars = ['-','.','?']

# Check this file is called from the command line with one argument
if len(sys.argv[1:]) != 1:
  print 'Incorrect number of arguments given.'
  print 'Usage:\n', sys.argv[0], 'NameOfYourFastaFile.fasta'
  exit(1)
DataFile = sys.argv[1]

# Check that the argument exists and is a file
if not os.path.isfile(DataFile):
  print DataFile, 'does not exist or is not a file.'
  exit(1)

# Read in all the sequences as a dictionary
AllSequences, SequenceLength = ReadSequencesFromFile(DataFile)

SequenceNames = AllSequences.keys()
sequences     = AllSequences.values()
NumSequences  = len(AllSequences)

DataFromAllPositions = []
SetOfAllBasesEncountered = []

# For brevity
acgt = ['A','C','G','T']


AllExpectedBases = acgt + IUPACdict.keys() + GapChars

Example #3

Show file

File: FindSubSeqsInAlignment.py Project: SANBIHIV/shiver

        for GapChar in GapChars:
            if GapChar in PrimerList[i]:
                print('SubSeq', PrimerList[i], 'contains a gap. This is unexpected.'+\
                '\nQuitting.', file=sys.stderr)
                exit(1)
    CounterObject = collections.Counter(PrimerList)
    DuplicatedPrimers = [i for i in CounterObject if CounterObject[i] > 1]
    if len(DuplicatedPrimers) != 0:
        for DuplicatedPrimer in DuplicatedPrimers:
            print('SubSeq', DuplicatedPrimer, 'was specified twice with the same',\
            'option.', file=sys.stderr)
        print('Quitting.', file=sys.stderr)
        exit(1)

# Read in the sequences from the alignment file (into a dictionary)
SeqDict, AlignmentLength = ReadSequencesFromFile(AlignmentFile)

# Check the chosen reference is in the alignment
if not ChosenRef in SeqDict:
    print('Could not find',
          ChosenRef,
          'in',
          AlignmentFile + '.\nQuitting.',
          file=sys.stderr)
    exit(1)
ChosenRefSeq = SeqDict[ChosenRef]

# Define the set of unique primers, i.e. StartPrimers+EndPrimers but not
# double counting those that appear in both. Record their lengths in a dict.
AllUniquePrimers = StartPrimers + \
[primer for primer in EndPrimers if not primer in StartPrimers]

Example #4

Show file

File: ConstructBestRef.py Project: fazekasda/shiver

DuplicatedContigNames = [i for i in CounterObject if CounterObject[i]>1]
if len(DuplicatedContigNames) != 0:
  for ContigName in DuplicatedContigNames:
    print('Contig name', ContigName, 'was duplicated in the arguments.', \
    file=sys.stderr)
  print('All contig names should be unique. Exiting.', file=sys.stderr)
  exit(1)

# Check the consensus name does not match one fo the contig names.
if ConsensusName != None and ConsensusName in ContigNames:
  print('The consensus name should not be the same as one of the contig', \
  'names. Quitting.', file=sys.stderr)
  exit(1)

# Read in the sequences from the alignment file (into a dictionary)
AllSeqsDict, AlignmentLength = ReadSequencesFromFile(AlignmentFile)

# Check the consensus is found
if ConsensusName != None:
  if not ConsensusName in AllSeqsDict:
    print(ConsensusName, 'not found in', AlignmentFile + '. Quitting.', \
    file=sys.stderr)
    exit(1)
  ConsensusSeq = AllSeqsDict[ConsensusName]

# Separate sequences into references and contigs
RefDict = {}
ContigDict = {}
for SeqName in AllSeqsDict:
  if SeqName in ContigNames:
    ContigDict[SeqName] = AllSeqsDict[SeqName]

Example #5

Show file

File: TrivialAlign.py Project: sdwfrost/SeqAnalTools

# Check this file is called from the command line with one argument
if len(sys.argv[1:]) != 1:
  print 'Incorrect number of arguments given.'
  print 'Usage:\n', sys.argv[0], 'NameOfYourFastaFile.fasta'
  exit(1)
DataFile = sys.argv[1]

# Check that the argument exists and is a file
if not os.path.isfile(DataFile):
  print DataFile, 'does not exist or is not a file.'
  exit(1)

# Read in the sequences as a dictionary. They are (nominally) not aligned.
Aligned=False
SeqDict, FirstSeqLength = ReadSequencesFromFile(DataFile, Aligned)

# We are expecting two sequences.
if len(SeqDict) != 2:
  print 'Expected 2 sequences;', DataFile, 'contains', str(len(SeqDict)) +\
  '.\nQuitting.'
  exit(1)

SeqNames = SeqDict.keys()
Seqs     = SeqDict.values()

# If the two sequences are the same length, there is no shuffling to do. Print
# them as they are.
if len(Seqs[0]) == len(Seqs[1]):
  for SeqName, seq in SeqDict.items():
    print '>'+SeqName

Example #6

Show file

    'specified.', file=sys.stderr)
    exit(1)

# Rename arguments for brevity / clarity.
MainAlnFile = args.MainAlignmentFile
PairedAlnFile = args.PairedAlignmentFile
ExciseUniqueInsertionsOfRefInMainAlignment = args.excise

# Check that the arguments exist and are files
for InputFile in [MainAlnFile, PairedAlnFile]:
    if not os.path.isfile(InputFile):
        print(InputFile, 'does not exist or is not a file.', file=sys.stderr)
        exit(1)

# Read in the sequences from the main alignment file (into a dictionary)
MainAlnSeqDict, MainAlnSeqLength = ReadSequencesFromFile(MainAlnFile)
MainAlnSeqNames = MainAlnSeqDict.keys()
MainAlnSeqs = MainAlnSeqDict.values()

# Read in the sequences from the paired alignment file
PairedAlnSeqDict, PairedAlnSeqLength = ReadSequencesFromFile(PairedAlnFile)

# Check it has two sequences
if len(PairedAlnSeqDict) != 2:
    print('File', PairedAlnFile, 'contains', len(PairedAlnSeqDict),\
    'sequences; two were expected.\nQuitting.', file=sys.stderr)
    exit(1)
Seq1name, Seq2name = PairedAlnSeqDict.keys()

# Check that one of the sequences is in the main alignment file (the 'Ref')
# and one is not (the 'SeqToAdd').

Example #7

Show file

File: FindPrimersInAlignment.py Project: sdwfrost/SeqAnalTools

    for GapChar in GapChars:
      if GapChar in PrimerList[i]:
        print 'Primer', PrimerList[i], 'contains a gap. This is unexpected.'+\
        '\nQuitting.'
        exit(1)
  CounterObject = collections.Counter(PrimerList)
  DuplicatedPrimers = [i for i in CounterObject if CounterObject[i]>1]
  if len(DuplicatedPrimers) != 0:
    for DuplicatedPrimer in DuplicatedPrimers:
      print 'Primer', DuplicatedPrimer, 'was specified twice with the same',\
      'option.'
    print 'Quitting.'
    exit(1)

# Read in the sequences from the alignment file (into a dictionary)
SeqDict, AlignmentLength = ReadSequencesFromFile(AlignmentFile)

# Check the chosen reference is in the alignment
if not ChosenRef in SeqDict:
  print 'Could not find', ChosenRef, 'in', AlignmentFile+'.\nQuitting.'
  exit(1)
ChosenRefSeq = SeqDict[ChosenRef]

# Define the set of unique primers, i.e. StartPrimers+EndPrimers but not 
# double counting those that appear in both. Record their lengths in a dict.
AllUniquePrimers = StartPrimers + \
[primer for primer in EndPrimers if not primer in StartPrimers]
NumUniquePrimers = len(AllUniquePrimers)
PrimerLengths = {primer : len(primer) for primer in AllUniquePrimers} 

# Finds the position in the alignment, for each primer, after