Ejemplo n.º 1
0
    def test_parsers_ignore(self):
        """LabeledRecordFinder should skip lines to ignore."""

        def never(line):
            return False

        def ignore_labels(line):
            return (not line) or line.isspace() or line.startswith("#")

        def is_start(line):
            return line.startswith(">")

        lines = [">abc", "\n", "1", ">def", "#ignore", "2"]
        self.assertEqual(
            list(LabeledRecordFinder(is_start)(lines)),
            [[">abc", "1"], [">def", "#ignore", "2"]],
        )
        self.assertEqual(
            list(LabeledRecordFinder(is_start, ignore=never)(lines)),
            [[">abc", "", "1"], [">def", "#ignore", "2"]],
        )
        self.assertEqual(
            list(LabeledRecordFinder(is_start, ignore=ignore_labels)(lines)),
            [[">abc", "1"], [">def", "2"]],
        )
Ejemplo n.º 2
0
    # separate by semicolons
    # get rid of leading/trailing spaces
    taxa = list(map(strip, taxonomy.split(";")))
    # delete trailing period if present
    last = taxa[-1]
    if last.endswith("."):
        taxa[-1] = last[:-1]
    return species, taxa


def is_feature_component_start(line):
    """Checks if a line starts with '/', ignoring whitespace."""
    return line.lstrip().startswith("/")


feature_component_iterator = LabeledRecordFinder(is_feature_component_start)

_join_with_empty = dict.fromkeys(["translation"])
_leave_as_lines = {}


def parse_feature(lines):
    """Parses a feature. Doesn't handle subfeatures.

    Returns dict containing:
    'type': source, gene, CDS, etc.
    'location': unparsed location string
    ...then, key-value pairs for each annotation,
        e.g. '/gene="MNBH"' -> {'gene':['MNBH']} (i.e. quotes stripped)
    All relations are assumed 'to many', and order will be preserved.
    """
Ejemplo n.º 3
0
def is_gde_label(x):
    """Checks if x looks like a GDE label line."""
    return x and x[0] in "%#"


def is_blank_or_comment(x):
    """Checks if x is blank or a FASTA comment line."""
    return (not x) or x.startswith("#") or x.isspace()


def is_blank(x):
    """Checks if x is blank."""
    return (not x) or x.isspace()


FastaFinder = LabeledRecordFinder(is_fasta_label, ignore=is_blank_or_comment)


def MinimalFastaParser(infile,
                       strict=True,
                       label_to_name=str,
                       finder=FastaFinder,
                       label_characters=">"):
    """Yields successive sequences from infile as (label, seq) tuples.

    If strict is True (default), raises RecordError when label or seq missing.
    """
    try:
        infile = open_(infile)
        close_at_end = True
    except (TypeError, AttributeError):
Ejemplo n.º 4
0
 def setUp(self):
     """Define a standard LabeledRecordFinder"""
     self.FastaLike = LabeledRecordFinder(lambda x: x.startswith(">"))
Ejemplo n.º 5
0
    WARNING: Only maps the data type if the key is in label_constructors above.
    """
    if not line.startswith("#"):
        raise ValueError("Labels must start with a # symbol.")

    if line.find(":") == -1:
        raise ValueError("Labels must contain a : symbol.")

    key, value = list(map(strip, line[1:].split(":", 1)))
    key = key.upper()
    if key in label_constructors:
        value = label_constructors[key](value)
    return key, value


BlatFinder = LabeledRecordFinder(query_finder, constructor=strip, ignore=is_blat_junk)

BlastFinder = LabeledRecordFinder(query_finder, constructor=strip, ignore=is_blast_junk)

PsiBlastFinder = LabeledRecordFinder(
    iter_finder, constructor=strip, ignore=is_blast_junk
)

PsiBlastQueryFinder = LabeledRecordFinder(
    iteration_set_finder, constructor=strip, ignore=is_blast_junk
)


def GenericBlastParser9(lines, finder, make_col_headers=False):
    """Yields successive records from lines (props, data list)
Ejemplo n.º 6
0
__version__ = "2019.10.24a"
__maintainer__ = "Rob Knight"
__email__ = "*****@*****.**"
__status__ = "Development"

maketrans = str.maketrans
strip = str.strip
rstrip = str.rstrip


def ll_start(line):
    """Returns True if line looks like the start of a LocusLink record."""
    return line.startswith(">>")


LLFinder = LabeledRecordFinder(ll_start)

pipes = DelimitedSplitter("|", None)
first_pipe = DelimitedSplitter("|")
commas = DelimitedSplitter(",", None)
first_colon = DelimitedSplitter(":", 1)

accession_wrapper = FieldWrapper(["Accession", "Gi", "Strain"], pipes)


def _read_accession(line):
    """Reads accession lines: format is Accession | Gi | Strain."""
    return MappedRecord(accession_wrapper(line))


rell_wrapper = FieldWrapper(["Description", "Id", "IdType", "Printable"], pipes)