def is_valid_bibtex(reference): """ Use pybtex to validate that a reference is in proper BibTeX format Args: reference: A String reference in BibTeX format. Returns: Boolean indicating if reference is valid bibtex. """ # str is necessary since pybtex seems to have an issue with unicode. The # filter expression removes all non-ASCII characters. sio = cStringIO.StringIO(remove_non_ascii(reference)) parser = bibtex.Parser() bib_data = parser.parse_stream(sio) return len(bib_data.entries) > 0
def is_valid_bibtex(reference): """ Use pybtex to validate that a reference is in proper BibTeX format Args: reference: A String reference in BibTeX format Returns: Boolean indicating if reference is valid bibtex. """ # str is necessary since pybtex seems to have an issue with unicode. The # filter expression removes all non-ASCII characters. sio = cStringIO.StringIO(remove_non_ascii(reference)) parser = bibtex.Parser() bib_data = parser.parse_stream(sio) return len(bib_data.entries) > 0
def _clean_cif(s): """ Removes non-ASCII and some unsupported _cgraph fields from the cif string """ clean = [] lines = s.split("\n") skip = False while len(lines) > 0: l = lines.pop(0) if skip: if l.strip().startswith("_") or l.strip() == "loop_": skip = False else: continue if l.strip().startswith("_cgraph"): skip = True elif not l.strip().startswith("_eof"): clean.append(remove_non_ascii(l)) return "\n".join(clean)
def test_remove_non_ascii(self): s = "".join(chr(random.randint(0, 127)) for i in xrange(10)) s += "".join(chr(random.randint(128, 150)) for i in xrange(10)) clean = remove_non_ascii(s) self.assertEqual(len(clean), 10)