def __init__(self, source) : """Constructor. Argument : a filename or a list of strings that represent sequences. - mysequences = genepy.seqarray("from_genbank.gb") -- loads the sequences in from_genbank.gb as BioPython Bio.Seq objects. - mysequences = genepy.seqarray(seq_list), where seq_list is a list of strings ( such as ["ACTG", "AGTA", "TTGC"] ) converts these to BioPython Bio.Seq objects using the generic_dna alphabet ( for now ). """ # If we're reading in a sequence set from a file if type(source) is str : if os.path.isfile(source) : self.seq = genepy.readalignment(source) self.filename = source else : print "%s not found, aborting." % source # If we're fed a list elif type(source) is list : self.seq = [SeqRecord.SeqRecord(s) for s in source] self.filename = "genepy.fasta" else : raise TypeError("Expected a filename or a list of strings.") # Generate static members self.update()
def align(self, force = True, it = False, full = False, full_iter = False, auto = True, threads = False) : """Align the array of sequences using ClustalO. -- force : True / False; overwrite filename, if it exists -- it : False, integers > 0; iterate the guide tree -- full : True / False; use full distance matrix for guide-tree calculation -- full_iter : True / False; use full distance matrix during iteration only -- auto : True / False; automatically select options for speed and accuracy -- threads : False, integers > 0; limit the number of threads; False uses all """ # System call to ClustalO genepy.align(self.filename, force, threads, full, full_iter, it, auto) # Read aligned sequence array self.seq = genepy.readalignment(os.path.splitext(self.filename)[0] + "_aligned_genepy.phy") # Update static members self.update()