def LoadSeqs(filename=None, format=None, data=None, moltype=None, name=None, aligned=True, label_to_name=None, parser_kw={}, constructor_kw={}, **kw): """Initialize an alignment or collection of sequences. Arguments: - filename: name of the sequence file - format: format of the sequence file - data: optional explicit provision of sequences - moltype: the MolType, eg DNA, PROTEIN - aligned: set True if sequences are already aligned and have the same length, results in an Alignment object. If False, a SequenceCollection instance is returned instead. If callable, will use as a constructor (e.g. can pass in DenseAlignment or CodonAlignment). - label_to_name: function for converting original name into another name. Default behavior is to preserve the original FASTA label and comment. To remove all FASTA label comments, and pass in only the label, pass in: label_to_name=lambda x: x.split()[0] To look up names in a dict, pass in: label_to_name = lambda x: d.get(x, default_name) ...where d is a dict that's in scope, and default_name is what you want to assign any sequence that isn't in the dict. If format is None, will attempt to infer format from the filename suffix. If label_to_name is None, will attempt to infer correct conversion from the format. """ if filename is None: assert data is not None assert format is None assert not kw, kw else: assert data is None, (filename, data) data = list(FromFilenameParser(filename, format, **parser_kw)) # the following is a temp hack until we have the load API sorted out. if aligned: #if callable, call it -- expect either f(data) or bool if hasattr(aligned, '__call__'): return aligned(data=data, MolType=moltype, Name=name, label_to_name=label_to_name, **constructor_kw) else: #was not callable, but wasn't False return Alignment(data=data, MolType=moltype, Name=name, label_to_name=label_to_name, **constructor_kw) else: #generic case: return SequenceCollection return SequenceCollection(data, MolType=moltype, Name=name, label_to_name=label_to_name, **constructor_kw)
def Sequence(moltype=None, seq=None, name=None, filename=None, format=None): if seq is None: for (a_name, a_seq) in FromFilenameParser(filename, format): if seq is None: seq = a_seq if name is None: name = a_name else: raise ValueError("Multiple sequences in '%s'" % filename) if moltype is not None: seq = moltype.makeSequence(seq) elif not hasattr(seq, 'MolType'): seq = ASCII.makeSequence(seq) if name is not None: seq.Name = name return seq