def __init__(self, starting_url=False, save=False, initial_seed=False): Crawler.__init__(self) #No database values, just these attributes self.url = starting_url if starting_url else self.base_url + "/w/Category:Artist" self.save = save self.initial_seed = initial_seed #self.url = self.tree.xpath("//div[@class='listPagination'][1]/a[contains(text(), 'next')]/@href")[0] return
def __init__(self, url, save=False, initial_seed=False): Crawler.__init__(self) #Database values self.row_id = False self.name = '' self.url = url #Other variables self.save = save #If not saving, print for debugging and testing purposes self.initial_seed = initial_seed return
def __init__(self, dj_id, dj_name, url, save=False, initial_seed=False): Crawler.__init__(self) #Database values self.row_id = False self.dj_id = dj_id self.url = url self.track_ids = list() self.multi_dj = False self.multi_version = True self.page_mod_time = False #Other attributes, including xpath components self.dj_name = dj_name self.searchable_dj_name = self.dj_name.split("(")[0].strip() self.no_comments_selector = "not(contains(@class,'commenttextfield'))" self.tree = self.get_tree(url) self.track_texts = list() self.save = save #If not saving, print for debugging and testing purposes self.initial_seed = initial_seed return