def __init__(self): #super(linkSearchAlgos,self).__init__() self.urlCorrect = urlCorrector() self.HTML_help = HTML_corrector_help() self.parser = parsers() config = getConfig() self.depth_setter(config["depth"])
def _install_parsers(): """Install support for each parser into the IMDb and IMDbTitle classes.""" property_name = re.compile(r'(?<=[a-z])([A-Z])') for name, parser in parsers.parsers(): name = property_name.sub(r'_\1', name).lower() populator = imdb_populator(parser, name, default=parser.default) setattr(IMDb, 'populate_'+name, populator) prop = property(*imdbtitle_property(name), doc="""IMDb """+name+""" autogenerated property.""") setattr(IMDbTitle, name, prop)
def __init__(self): self.list_of_links = [] self.SCHEME_HTTP = "http" self.SCHEME_HTTPS = "https" self.SUPPORTED_SCHEMES = (self.SCHEME_HTTP, self.SCHEME_HTTPS) self.parser = parsers() self.search_choices = {1: 'exact', 2: 'similar'} config = getConfig() self.search_choice = config["string_search"]["type"] self.setProximity(config["string_search"]["proximity"])
def __init__(self, parserslist = {}, files = None, saveConfig = False): parserslist_new = parsers.parsers() parserslist_new.update(parserslist) if files is None: files = ['config.ini'] self.saveConfig = saveConfig self.parser_list = parserslist_new self.filename_list = [appdir(x) for x in files] self.configini = None self.errors = False self.fatalErrors = False self.do_reload()
def __init__(self, parserslist={}, files=None, saveConfig=False): parserslist_new = parsers.parsers() parserslist_new.update(parserslist) if files is None: files = ['config.ini'] self.saveConfig = saveConfig self.parser_list = parserslist_new self.filename_list = [appdir(x) for x in files] self.configini = None self.errors = False self.fatalErrors = False self.do_reload()
def _install_parsers(): """Install support for each parser into the IMDb and IMDbTitle classes.""" property_name = re.compile(r'(?<=[a-z])([A-Z])') for name, parser in parsers.parsers(): name = property_name.sub(r'_\1', name).lower() populator = imdb_populator(parser, name, default=parser.default) setattr(IMDb, 'populate_' + name, populator) prop = property(*imdbtitle_property(name), doc="""IMDb """ + name + """ autogenerated property.""") setattr(IMDbTitle, name, prop)
def rebuild_index(self, dbdir): """Convert and index data files for random access. Index movie list for searching.""" # Import and index data files if os.path.exists(self.dbfile): raise Exception('%s exists' % self.dbfile) for parsername, parser in parsers.parsers(): obj = parser(dbfile=self.dbfile, dbdir=dbdir, debug=self.debug) # if self.debug: # print "Indexing %s..." % parsername with Timer(indent=2, quiet=not self.debug): obj.rebuild_index(do_copy=True) # Create index of movie titles if self.debug: print "1 Creating search index..." with Timer(indent=2, quiet=not self.debug): search.create_index(self.dbfile, dbdir, debug=self.debug)
def rebuild_index(self, dbdir): """Convert and index data files for random access. Index movie list for searching.""" # Import and index data files if os.path.exists(self.dbfile): raise Exception('%s exists' % self.dbfile) for parsername, parser in parsers.parsers(): obj = parser(dbfile=self.dbfile, dbdir=dbdir, debug=self.debug) if self.debug: print "Indexing %s..." % parsername with Timer(indent=2, quiet=not self.debug): obj.rebuild_index(do_copy=True) # Create index of movie titles if self.debug: print "Creating search index..." with Timer(indent=2, quiet=not self.debug): search.create_index(self.dbfile, dbdir, debug=self.debug)
def __init__(self): self.urlCorrect = urlCorrector() self.HTML_corrector_helper = HTML_corrector_help() self.download = download() self.errors = errors() self.parsers = parsers() self.choices = { '1': 'download', '2': 'error', '3': 'search', '4': 'crawl', '5': 'web structure' } self.choice = self.choices['4'] #the default is crawl self.list_of_links = self.urlCorrect.list_of_links self.NOT_LINK = self.urlCorrect.NOT_LINK self.SCHEME_HTTP = self.urlCorrect.SCHEME_HTTP self.SCHEME_HTTPS = self.urlCorrect.SCHEME_HTTPS self.SUPPORTED_SCHEMES = self.urlCorrect.SUPPORTED_SCHEMES