def _get_ref_domain_thread(account: MajesticCom, sub_domain: str, count_per_domain: int, fresh_data: bool, sub_domains: [], temp_sub_domains: [], categories: [], callback, tf=20, bad_country_list=[]): temp = [] print("doing backlinks of domain:", sub_domain, " domain len:", len(temp_sub_domains)) try: temp = account.get_ref_domains(sub_domain, max_count=count_per_domain, is_dev=False, fresh_data=fresh_data) # temp = account.get_backlinks(sub_domain, count_per_domain, topic="", is_dev=False, fresh_data=fresh_data) except Exception as ex: print(ex) for item in temp: if isinstance(item, MajesticRefDomainStruct): # item_catagory = str(CategoryManager.decode_sub_category(item.src_topic, False)) domain = item.domain item.ref_domain = domain if domain not in sub_domains and domain not in temp_sub_domains: if len(categories) > 0: is_in = False if len(item.src_topic) > 0: decoded = str(CategoryManager.decode_sub_category(item.src_topic, False)) for cate in categories: if cate in decoded: is_in = True break if is_in and item.tf >= tf and item.country not in bad_country_list: # add seed temp_sub_domains.append(domain) elif item.tf >= tf and item.country not in bad_country_list: temp_sub_domains.append(domain) item.ref_domain = domain if callback is not None: callback(item) time.sleep(1)
def get_sites_by_seed_sites(account: MajesticCom, seed_domains: [], catagories: [], fresh_data=False, index=0, iteration=1, loop_count=0, count_per_domain=100, callback=None, current_count=0, max_count=-1, tf=20) -> []: if iteration < 0: raise ValueError("get_sites_by_seed_sites: iteration should >= 0.") sub_domains = [LinkChecker.get_root_domain(x, use_www=False)[4] for x in seed_domains[index:]] if len(sub_domains) == 0: return [] backlinks = [] # counter = index if max_count > 0 and current_count >= max_count: return backlinks temp_sub_domains = [] temp = [] # target_catagories = [] # for catagory in catagories: # target_catagories.append(str(CategoryManager.decode_sub_category(catagory, False))) for sub_domain in sub_domains: print("doing backlinks of domain:", sub_domain, "seed len:", len(temp_sub_domains)) try: temp = account.get_backlinks(sub_domain, count_per_domain, topic="", is_dev=False, fresh_data=fresh_data) current_count += 1 except Exception as ex: print(ex) for item in temp: if isinstance(item, MajesticBacklinkDataStruct): # item_catagory = str(CategoryManager.decode_sub_category(item.src_topic, False)) domain = LinkChecker.get_root_domain(item.backlink, use_www=False)[4] item.ref_domain = domain # if callback is not None: # callback(item) # if len(target_catagories) > 0 and item_catagory not in target_catagories: # continue if domain not in sub_domains and domain not in temp_sub_domains: if len(catagories) > 0: is_in = False if len(item.src_topic) > 0: decoded = str(CategoryManager.decode_sub_category(item.src_topic, False)) for cate in catagories: if cate in decoded: is_in = True break if is_in and item.src_tf >= tf: temp_sub_domains.append(domain) elif item.src_tf >= tf: temp_sub_domains.append(domain) item.ref_domain = domain if callback is not None: callback(item) if max_count > 0 and current_count >= max_count: break if loop_count >= iteration: return backlinks else: return backlinks + GoogleMajestic.get_sites_by_seed_sites(account, sub_domains + temp_sub_domains, catagories, fresh_data, len(seed_domains), iteration, loop_count+1, count_per_domain, callback, current_count, max_count, tf)
def _get_back_link_thread(account: MajesticCom, sub_domain: str, count_per_domain: int, fresh_data: bool, sub_domains: [], temp_sub_domains: [], categories: [], callback, tf=20, bad_country_list=[]): temp = [] print("doing backlinks of domain:", sub_domain, " domain len:", len(temp_sub_domains)) try: temp = account.get_backlinks(sub_domain, count_per_domain, topic="", is_dev=False, fresh_data=fresh_data) except Exception as ex: print(ex) for item in temp: if isinstance(item, MajesticBacklinkDataStruct): # item_catagory = str(CategoryManager.decode_sub_category(item.src_topic, False)) domain = LinkChecker.get_root_domain(item.backlink, use_www=False)[4] item.ref_domain = domain # if callback is not None: # callback(item) # if len(target_catagories) > 0 and item_catagory not in target_catagories: # continue if domain not in sub_domains and domain not in temp_sub_domains: if len(categories) > 0: is_in = False if len(item.src_topic) > 0: decoded = str(CategoryManager.decode_sub_category(item.src_topic, False)) for cate in categories: if cate in decoded: is_in = True break if is_in and item.src_tf >= tf: temp_sub_domains.append(domain) elif item.src_tf >= tf: temp_sub_domains.append(domain) item.ref_domain = domain if callback is not None: callback(item) time.sleep(1)