Beispiel #1
    def test_parsers_ignore(self):
        """LabeledRecordFinder should skip lines to ignore."""

        def never(line):
            return False

        def ignore_labels(line):
            return (not line) or line.isspace() or line.startswith("#")

        def is_start(line):
            return line.startswith(">")

        lines = [">abc", "\n", "1", ">def", "#ignore", "2"]
            [[">abc", "1"], [">def", "#ignore", "2"]],
            list(LabeledRecordFinder(is_start, ignore=never)(lines)),
            [[">abc", "", "1"], [">def", "#ignore", "2"]],
            list(LabeledRecordFinder(is_start, ignore=ignore_labels)(lines)),
            [[">abc", "1"], [">def", "2"]],
Beispiel #2
    # separate by semicolons
    # get rid of leading/trailing spaces
    taxa = list(map(strip, taxonomy.split(";")))
    # delete trailing period if present
    last = taxa[-1]
    if last.endswith("."):
        taxa[-1] = last[:-1]
    return species, taxa

def is_feature_component_start(line):
    """Checks if a line starts with '/', ignoring whitespace."""
    return line.lstrip().startswith("/")

feature_component_iterator = LabeledRecordFinder(is_feature_component_start)

_join_with_empty = dict.fromkeys(["translation"])
_leave_as_lines = {}

def parse_feature(lines):
    """Parses a feature. Doesn't handle subfeatures.

    Returns dict containing:
    'type': source, gene, CDS, etc.
    'location': unparsed location string
    ...then, key-value pairs for each annotation,
        e.g. '/gene="MNBH"' -> {'gene':['MNBH']} (i.e. quotes stripped)
    All relations are assumed 'to many', and order will be preserved.
Beispiel #3
def is_gde_label(x):
    """Checks if x looks like a GDE label line."""
    return x and x[0] in "%#"

def is_blank_or_comment(x):
    """Checks if x is blank or a FASTA comment line."""
    return (not x) or x.startswith("#") or x.isspace()

def is_blank(x):
    """Checks if x is blank."""
    return (not x) or x.isspace()

FastaFinder = LabeledRecordFinder(is_fasta_label, ignore=is_blank_or_comment)

def MinimalFastaParser(infile,
    """Yields successive sequences from infile as (label, seq) tuples.

    If strict is True (default), raises RecordError when label or seq missing.
        infile = open_(infile)
        close_at_end = True
    except (TypeError, AttributeError):
 def setUp(self):
     """Define a standard LabeledRecordFinder"""
     self.FastaLike = LabeledRecordFinder(lambda x: x.startswith(">"))
Beispiel #5
    WARNING: Only maps the data type if the key is in label_constructors above.
    if not line.startswith("#"):
        raise ValueError("Labels must start with a # symbol.")

    if line.find(":") == -1:
        raise ValueError("Labels must contain a : symbol.")

    key, value = list(map(strip, line[1:].split(":", 1)))
    key = key.upper()
    if key in label_constructors:
        value = label_constructors[key](value)
    return key, value

BlatFinder = LabeledRecordFinder(query_finder, constructor=strip, ignore=is_blat_junk)

BlastFinder = LabeledRecordFinder(query_finder, constructor=strip, ignore=is_blast_junk)

PsiBlastFinder = LabeledRecordFinder(
    iter_finder, constructor=strip, ignore=is_blast_junk

PsiBlastQueryFinder = LabeledRecordFinder(
    iteration_set_finder, constructor=strip, ignore=is_blast_junk

def GenericBlastParser9(lines, finder, make_col_headers=False):
    """Yields successive records from lines (props, data list)
Beispiel #6
__version__ = "2019.10.24a"
__maintainer__ = "Rob Knight"
__email__ = "*****@*****.**"
__status__ = "Development"

maketrans = str.maketrans
strip = str.strip
rstrip = str.rstrip

def ll_start(line):
    """Returns True if line looks like the start of a LocusLink record."""
    return line.startswith(">>")

LLFinder = LabeledRecordFinder(ll_start)

pipes = DelimitedSplitter("|", None)
first_pipe = DelimitedSplitter("|")
commas = DelimitedSplitter(",", None)
first_colon = DelimitedSplitter(":", 1)

accession_wrapper = FieldWrapper(["Accession", "Gi", "Strain"], pipes)

def _read_accession(line):
    """Reads accession lines: format is Accession | Gi | Strain."""
    return MappedRecord(accession_wrapper(line))

rell_wrapper = FieldWrapper(["Description", "Id", "IdType", "Printable"], pipes)