class TwitterBot: __favorites_regex = '<a href="http://twitter.com/[a-zA-Z0-9_]+" title="[a-zA-Z0-9\-_ .]+">[a-zA-Z0-9_]+</a>' __following_regex = '<a href="http://twitter.com/[a-zA-Z0-9_]+" rel="contact"><img alt="[a-zA-Z0-9\-_ .]+" class' __fav_complete_name_regex = 'title="[a-zA-Z0-9\-_ .]+">' __fav_complete_name_prefix = 'title="' __fav_complete_name_sufix = '">' __foll_complete_name_regex = 'img alt="[a-zA-Z0-9\-_ .]+" class' __foll_complete_name_prefix = 'img alt="' __foll_complete_name_sufix = '" class' __url_regex = 'href="http://twitter.com/[a-zA-Z0-9_]+" ' __url_prefix = 'href="' __url_sufix = '" ' def __init__(self): pass def initialize(self, proxies_per_proto={}, user=None, passw=None, debug=False): print 'INIT: TwitterBot' self.__br = Browser() self.__br.set_proxies(proxies_per_proto) self.__br.set_debug_http(debug) self.__debug = debug self.__sandman = SandMan('TwitterBot') self.__ngd = NGD() self.__ngd.set_proxies(proxies_per_proto) self.__lock = Lock() try: # sign in self.__br.open("http://twitter.com/") self.__br.select_form(nr=1) self.__br['session[username_or_email]'] = user self.__br['session[password]'] = passw resp = self.__br.submit() time.sleep(0.2) except Exception, e: if self.__debug: traceback.print_exc(file=sys.stdout) print str(e) print 'EXCEPTION on TwitterBot, possibly bad user/password or https login don\' work behind a proxy.'
class SearchEngineBot: def __init__(self): pass def initialize(self, proxies_per_proto={}, user=None, passw=None, debug=False): print 'INIT: SearchEngineBot' self.__br = Browser() self.__br.set_proxies(proxies_per_proto) self.__br.set_debug_http(debug) self.__ngd = NGD(proxies_per_proto) self.__harvest_command = EmailHarvestingCommand() self.__harvest_command.set_only_complete_names(False) self.__sandman = SandMan('SearchEngineBot') # no sign in def set_proxies_per_proto(self, proxies): self.__proxies = proxies try: self.__ngd.set_proxies(proxies) except: print 'EXCEPTION on SeachEngineBot, possibly bad user/password or https login don\' work behind a proxy.' if len(proxies) == 0: proxy = None else: proxy = tuple(proxies['http'].split(':')) proxy = (proxy[0], int(proxy[1])) self.__proxy = proxy def set_sleep_secs(self, secs): self.__sandman.set_sleep_secs(secs) def set_sleep_module(self, iterations): self.__sandman.set_sleep_module(iterations) def set_sleep_failure(self, secs): self.__sandman.set_sleep_failure(secs) def set_sleep_random_flag(self, bool): self.__sandman.set_sleep_random_flag(bool) def self_email(self, email, name): if name.lower().startswith(email.split('@')[0].lower()): return True if len(name.split(' ')) == 1 and name.lower() == email.split( '@')[0].lower(): return True if len(name.split(' ')) == 2 and '.'.join( name.split(' ')).lower() == email.split('@')[0].lower(): return True if len(name.split(' ')) == 2 and '_'.join( name.split(' ')).lower() == email.split('@')[0].lower(): return True if len(name.split(' ')) == 2 and name.split( ' ')[0].lower() == email.split('@')[0].lower(): return True if len(name.split(' ')) == 2 and ( name.split(' ')[0][0] + name.split(' ')[1]).lower() == email.split('@')[0].lower(): return True return False def name_to_emails(self, (aliases, graph)): self.__harvest_command.set_only_complete_names(False) return self.__name_to_emails(aliases, 'all_mails')