def get_profiles(self): final_locs = [] _locs = get_sitemap_locs(SITEMAP_URL) for _loc in _locs: if '-profile' in _loc and _loc.endswith('-profile/'): final_locs.append(_loc) final_locs = list(set(final_locs)) return final_locs
def get_profiles(self): final_locs = [] _locs = get_sitemap_locs(SITEMAP_URL) for _loc in _locs: if _loc.startswith('https://www.trustradius.com/products/' ) and _loc.endswith('/reviews'): final_locs.append(_loc) final_locs = list(set(final_locs)) return final_locs
def get_profiles(self): final_locs = [] _locs = get_sitemap_locs(SITEMAP_URL) for _loc in _locs: if _loc.startswith('https://www.serchen.com/company/' ) and not _loc.endswith('similar-companies/'): final_locs.append(_loc) final_locs = list(set(final_locs)) return final_locs
def get_profiles(self): final_locs = [] _locs = get_sitemap_locs(SITEMAP_URL) for _loc in _locs: if _loc.startswith('http://www.saasgenius.com/program/'): if not _loc.endswith('/alternatives') or not _loc.endswith( '/comparisons'): final_locs.append(_loc) final_locs = list(set(final_locs)) return final_locs
def get_profiles(self): final_locs = [] for sitemap in SITEMAPS: logger.info(sitemap) _locs = get_sitemap_locs(sitemap) for _loc in _locs: if _loc.startswith('https://www.builtincolorado.com/company/'): final_locs.append(_loc) final_locs = list(set(final_locs)) return final_locs
def get_profiles(self): final_locs = [] for sm in SITEMAP_URLS: _locs = get_sitemap_locs(sm) for _loc in _locs: # if _loc == 'https://www.g2crowd.com/' or 'competitors' in _loc: # continue if _loc.endswith('/reviews'): loc = _loc[:_loc.rindex('/') + 1] loc = loc + 'details' if loc not in final_locs: final_locs.append(loc) return final_locs
def get_profiles(self): logger.info(f'Getting profiles - {SITEMAP_URL}') final_locs = get_sitemap_locs(SITEMAP_URL) return final_locs