예제 #1
0
    def test_full_LOC(self):
        """LOC should behave as expected when initialized with rich data"""
        data = [
            "abc\t def",
            " 3 \t n",
            "  abc   \txyz\n\n",
            "x\t5",
            "fgh   ",
            "x\t3    ",
        ]

        class rec(MappedRecord):
            Required = {"abc": []}

        maps = {"abc": list_adder, "x": int_setter, "fgh": bool_setter}
        label_splitter = DelimitedSplitter("\t")
        constructor = rec
        strict = True
        loc_bad = LineOrientedConstructor(data, label_splitter, maps,
                                          constructor, strict)
        self.assertRaises(FieldError, loc_bad)
        strict = False
        loc_good = LineOrientedConstructor(data, label_splitter, maps,
                                           constructor, strict)
        result = loc_good()
        assert isinstance(result, rec)
        self.assertEqual(result, {
            "abc": ["def", "xyz"],
            "3": "n",
            "fgh": False,
            "x": 3
        })
예제 #2
0
 def test_splitter(self):
     """StrictFieldWrapper with splitter should use that splitter"""
     fields = ["label", "count"]
     splitter = DelimitedSplitter(":", -1)
     f = StrictFieldWrapper(fields, splitter)
     self.assertEqual(f("n:k:n:a:sd  "), {"label": "n:k:n:a", "count": "sd"})
     self.assertEqual(f("nknasd:"), {"label": "nknasd", "count": ""})
     self.assertRaises(FieldError, f, "")
예제 #3
0
    def test_parsers(self):
        """DelimitedSplitter should return function with correct behavior"""
        empty = DelimitedSplitter()
        space = DelimitedSplitter(None)
        semicolon = DelimitedSplitter(";")
        twosplits = DelimitedSplitter(";", 2)
        allsplits = DelimitedSplitter(";", None)
        lastone = DelimitedSplitter(";", -1)
        lasttwo = DelimitedSplitter(";", -2)

        self.assertEqual(empty("a   b  c"), ["a", "b  c"])
        self.assertEqual(empty("abc"), ["abc"])
        self.assertEqual(empty("   "), [])

        self.assertEqual(empty("a  b  c"), space("a  b  c"))
        self.assertEqual(semicolon("  a  ; b   ;  c  d"), ["a", "b   ;  c  d"])
        self.assertEqual(twosplits("  a  ; b   ;  c  d"), ["a", "b", "c  d"])
        self.assertEqual(allsplits(" a ;  b  ; c;;d;e  ;"),
                         ["a", "b", "c", "", "d", "e", ""])
        self.assertEqual(lastone(" a ;  b  ; c;;d;e  ;"),
                         ["a ;  b  ; c;;d;e", ""])
        self.assertEqual(lasttwo(" a ;  b  ; c;;d;e  ;"),
                         ["a ;  b  ; c;;d", "e", ""])
        self.assertEqual(lasttwo(""), [])
        self.assertEqual(lasttwo("x"), ["x"])
        self.assertEqual(lasttwo("x;"), ["x", ""])
예제 #4
0
def DefaultDelimitedSplitter(delimiter):
    """Wraps delimited splitter to handle empty records"""
    parser = DelimitedSplitter(delimiter=delimiter)

    def f(line):
        parsed = parser(line)
        if len(parsed) == 1:
            parsed.append("")
        return parsed

    return f
예제 #5
0
파일: locuslink.py 프로젝트: jbw900/cogent3
__email__ = "*****@*****.**"
__status__ = "Development"

maketrans = str.maketrans
strip = str.strip
rstrip = str.rstrip


def ll_start(line):
    """Returns True if line looks like the start of a LocusLink record."""
    return line.startswith(">>")


LLFinder = LabeledRecordFinder(ll_start)

pipes = DelimitedSplitter("|", None)
first_pipe = DelimitedSplitter("|")
commas = DelimitedSplitter(",", None)
first_colon = DelimitedSplitter(":", 1)

accession_wrapper = FieldWrapper(["Accession", "Gi", "Strain"], pipes)


def _read_accession(line):
    """Reads accession lines: format is Accession | Gi | Strain."""
    return MappedRecord(accession_wrapper(line))


rell_wrapper = FieldWrapper(["Description", "Id", "IdType", "Printable"], pipes)

예제 #6
0
파일: clustal.py 프로젝트: jbw900/cogent3
            result[key] = [val]
            labels.append(key)
    return result, labels


def is_clustal_seq_line(line):
    """Returns True if line starts with a non-blank character but not 'CLUSTAL'.

    Useful for filtering other lines out of the file.
    """
    return (line and (not line[0].isspace())
            and (not line.startswith("CLUSTAL"))
            and (not line.startswith("MUSCLE")))


last_space = DelimitedSplitter(None, -1)


def delete_trailing_number(line):
    """Deletes trailing number from a line.

    WARNING: does not preserve internal whitespace when a number is removed!
    (converts each whitespace run to a single space). Returns the original
    line if it didn't end in a number.
    """
    pieces = line.split()
    try:
        int(pieces[-1])
        return " ".join(pieces[:-1])
    except ValueError:  # no trailing numbers
        return line