def main(): mrsty_file=sys.argv[3] original_filename=sys.argv[2] data_store_name=sys.argv[1] original_file=Text(bz2.BZ2File(original_filename, 'r')) print "Loading semantic types from %s" % mrsty_file stypes=SemanticTypes() stypes.build_from_mrsty_file(MRSTYTable(bz2.BZ2File(mrsty_file))) print "Semantic types loaded." print "Turning the data from %s into %s. Please wait." % ( original_filename, data_store_name) data_store=StringDBDict(data_store_name, sync_every_transactions=0, write_out_every_transactions=200000, file_mode='c') data_store.sync_every=0 build_concept_dictionary(original_file, data_store, stypes) data_store.sync_every=100 print "Conversion done."
def __iter__(self): """Iterates over the file, skipping lines that contain ignorable snippets and constructing line objects of the specified type for all others. Lines that raise exceptions are never reported as they are malformed. However, they can be examined by ignore_exception and (if ignore_exception returns True) parsing may continue. We will only allow ParsingErrors to be caught, which should be enough to ignore truly known parsing problems. """ for line in Text.__iter__(self): if self.is_ignorable(line.lower()): continue try: new_line = self.__line_type(line) except ParsingError, which_exception: if self.ignore_exception(which_exception, line): pass else: logging.error("Unignorable exception on line '%s'", line) raise else: yield new_line
def __iter__(self): """Iterates over the file, skipping lines that contain ignorable snippets and constructing line objects of the specified type for all others. Lines that raise exceptions are never reported as they are malformed. However, they can be examined by ignore_exception and (if ignore_exception returns True) parsing may continue. We will only allow ParsingErrors to be caught, which should be enough to ignore truly known parsing problems. """ for line in Text.__iter__(self): if self.is_ignorable(line.lower()): continue try: new_line=self.__line_type(line) except ParsingError, which_exception: if self.ignore_exception(which_exception, line): pass else: logging.error("Unignorable exception on line '%s'", line) raise else: yield new_line
def __repr__(self): return "<%s file based on %r>" % (self.__class__.__name__, Text.__repr__(self))
def __init__(self, fileobject, type_of_lines, lines_to_ignore): Text.__init__(self, fileobject) self.__line_type = type_of_lines # Since all files are lowercased and stripped, the lines to ignore # must be so, too. self.__lines_to_ignore = [x.lower().strip() for x in lines_to_ignore]
def __init__(self, fileobject, type_of_lines, lines_to_ignore): Text.__init__(self, fileobject) self.__line_type=type_of_lines # Since all files are lowercased and stripped, the lines to ignore # must be so, too. self.__lines_to_ignore=[x.lower().strip() for x in lines_to_ignore]