Ejemplo n.º 1
0
 def _domain_search(self, domain, api_key="", name=""):
     qry = 'site:zoominfo.com/c/ {0}'.format(domain)
     df = Google().search(qry)
     if df.empty: 
         data = {'company_name': name, "domain":domain}
         return CompanyInfoCrawl()._persist(data,"zoominfo",api_key)
     df['_name'] = [i.split("Company Profile")[0].strip() 
                    for i in df.link_text]
     df["score"] = [fuzz.ratio(b, name) for b in df._name]
     df = df[df.score > 70]
     df = df.sort('score',ascending=False)
     if df.empty: 
       data = {'company_name': name, "domain":domain}
       return CompanyInfoCrawl()._persist(data,"zoominfo",api_key)
     df = df.reset_index().drop('index',1)
     url = df.ix[0].link
     print "ZOOMINFO URL", url
     html = Google().cache(url)
     html = requests.get(url).text
     html = self._remove_non_ascii(html)
     zoominfo = self._cache_html_to_df(html)
     zoominfo['company_name'] = name
     zoominfo['handle'] = url
     zoominfo["domain_search"] = True
     zoominfo["domain"] = domain
     print zoominfo
     CompanyInfoCrawl()._persist(zoominfo, "zoominfo", api_key)
Ejemplo n.º 2
0
    def _press_releases(self, domain, api_key="", company_name=""):
        ''' Google News, PRNewsWire, BusinessWire '''
        pw = Google().search('"{0}" site:prnewswire.com'.format(company_name))
        bw = Google().search(
            '"{0}" site:businesswire.com'.format(company_name))
        mw = Google().search('"{0}" site:marketwired.com'.format(company_name))
        nw = Google().search('"{0}" site:newswire.ca'.format(company_name))
        rt = Google().search('"{0}" site:reuters.com'.format(company_name))

        p = pd.concat([pw, bw, mw, nw, rt])
        p = p.drop_duplicates()
        p['date'] = [
            span.split('Business Wire')[-1].split('...')[0].strip()
            for span in p.link_span
        ]
        p['description'] = [
            "".join(span.split('...')[1:]).strip() for span in p.link_span
        ]
        p['date'] = [span.split('...')[0].strip() for span in p.link_span]
        p["timestamp"] = [Helper()._str_to_timestamp(i) for i in p.date]
        p['title'] = p['link_text']

        p = p.drop('link_text', 1)
        p = p.drop('url', 1)
        p = p.drop('link_span', 1)
        #for i in p.timestamp: print i

        press = {'data': p.to_dict('records'), 'company_name': company_name}
        press["domain"] = domain
Ejemplo n.º 3
0
    def _recent_webpages_published(self, domain, period=None):
        if period:
            df = Google().search("site:{0}".format(domain), 1, "d")
            #df2 = Google().search("{0}".format(name), 1, "d")
        else:
            df = Google().search("site:{0}".format(domain))
            #df2 = Google().search("{0}".format(name))

        # TODO - add timestamps
        # TODO - queue scrapes
        #df = pd.concat([df, df2])
        data = df
        if data.empty: return "NO RECENT WEBPAGES"
        data["domain"] = domain
        data["event_type"] = "RecentWebpageEvent"
        print data
        data = data.applymap(lambda x: self._remove_non_ascii(x))
        print data
        data["event_key"] = [
            "".join(map(str,
                        _data.to_dict().values()))[:124]
            for i, _data in data.iterrows()
        ]
        data = [row.dropna().to_dict() for i, row in data.iterrows()]
        r.table("events").insert(data).run(conn)
Ejemplo n.º 4
0
 def init(self):
     self.colorlib = self.madcow.colorlib
     try:
         self.learn = Learn(madcow=madcow)
     except:
         self.learn = None
     self.google = Google()
Ejemplo n.º 5
0
 def _reviews(self, domain, api_key="", name=""):
     df = Google().search('site:glassdoor.com/reviews {0}'.format(name))
     if df.empty: return
     url = df.ix[0].link
     r = BeautifulSoup(Google().cache(url))
     rating = r.find('div', {'class': 'ratingNum'})
     rating = rating.text if rating else ""
     # TODO - awards
     reviews = pd.DataFrame()
     for review in r.find_all('li', {'class': 'empReview'}):
         pros = review.find('p', {'class': 'pros'})
         cons = review.find('p', {'class': 'cons'})
         extra = review.find('p', {'class': 'notranslate'})
         summary = review.find('span', {'class': 'summary'})
         date = review.find('time', {'class': 'date'})
         vals = [pros, cons, extra, summary, date]
         cols = ["pros", "cons", "extra", "summary", "date"]
         vals = [val.text.strip() for val in vals]
         data = dict(zip(cols, vals))
         data["timestamp"] = Helper()._str_to_timestamp(data["date"])
         reviews = reviews.append(data, ignore_index=True)
     data = {'data': reviews.to_dict('r'), 'company_name': name}
     data['api_key'] = api_key
     data['domain'] = domain
     CompanyExtraInfoCrawl()._persist(data, "glassdoor_reviews", api_key)
Ejemplo n.º 6
0
 def _daily_news(self, domain, api_key="", name=""):
     df = Google().search('site:twitter.com {0}'.format(domain))
     link = df.link.tolist()[0]
     html = Google().cache(link)
     tweets = self._tweets(html, api_key)
     data = {"data": tweets, "company_name": name, "domain": domain}
     CompanyExtraInfoCrawl()._persist(data, "tweets")
Ejemplo n.º 7
0
    def _old_parse_article_html(self, objectId, title, industry_press=None):
        df = Google().search("{0} site:marketwired.com".format(title))
        html = Google().cache(df.link.tolist()[0])
        article = BeautifulSoup(html).find("div",{"class":"mw_release"})
        article = article.text if article else None
        #company_name = BeautifulSoup(html).find("span",{"itemprop":"name"})
        company_name = BeautifulSoup(html).find("strong")
        company_name = company_name.split("SOURCE:")[-1] if company_name else None
        #q.enqueue(ClearSpark()._bulk_company_info, company_name)
        links, website = [], None
        for a in BeautifulSoup(html).find_all("a"):
            if "href" not in a.attrs: continue
            href = a["href"].lower()
            if "http" not in href: continue
            elif "marketwire" in href: continue
            elif "javascript" in href: continue
            elif "linkedin" in href: continue
            elif "twitter" in href: continue
            elif "youtube" in href: continue
            elif "flickr" in href: continue
            elif "facebook" in href: continue
            elif "google" in href: continue
            elif "addthis" in href: continue
            elif "sysomos" in href: continue

            if "target" in a.attrs:
                website = a["href"]
            links.append(href.strip())

        info = {"article": article, "company_name": company_name, 
                "website":website, "links":links}
        return info
Ejemplo n.º 8
0
    def _employees(self, company_name="", keyword=""):
        ''' Linkedin Scrape '''
        # TODO - add linkedin directory search
        ''' Linkedin Scrape'''
        args = '-inurl:"/dir/" -inurl:"/find/" -inurl:"/updates"'
        args = args+' -inurl:"job" -inurl:"jobs2" -inurl:"company"'
        qry = '"at {0}" {1} {2} site:linkedin.com'
        qry = qry.format(company_name, args, keyword)
        results = Google().search(qry, 10)
        results = results.dropna()
        results = Google()._google_df_to_linkedin_df(results)
        _name = '(?i){0}'.format(company_name)
        if " " in company_name:
            results['company_score'] = [fuzz.partial_ratio(_name, company) 
                                        for company in results.company]
        else:
            results['company_score'] = [fuzz.ratio(_name, company) 
                                        for company in results.company]
        if keyword != "":
            results['score'] = [fuzz.ratio(keyword, title) 
                                for title in results.title]
            results = results[results.score > 75]

        results = results[results.company_score > 64]
        results = results.drop_duplicates()
        data = {'data': results.to_dict('r'), 'company_name':company_name}
        CompanyExtraInfoCrawl()._persist(data, "employees", "")

        job = rq.get_current_job()
        print job.meta.keys()
        if "queue_name" in job.meta.keys():
          if RQueue()._has_completed(job.meta["queue_name"]):
            q.enqueue(Jigsaw()._upload_csv, job.meta["company_name"])
        return results
Ejemplo n.º 9
0
 def _domain_search(self, domain, api_key="", name=""):
     qry = 'site:zoominfo.com/c/ {0}'.format(domain)
     df = Google().search(qry)
     if df.empty:
         data = {'company_name': name, "domain": domain}
         return CompanyInfoCrawl()._persist(data, "zoominfo", api_key)
     df['_name'] = [
         i.split("Company Profile")[0].strip() for i in df.link_text
     ]
     df["score"] = [fuzz.ratio(b, name) for b in df._name]
     df = df[df.score > 70]
     df = df.sort('score', ascending=False)
     if df.empty:
         data = {'company_name': name, "domain": domain}
         return CompanyInfoCrawl()._persist(data, "zoominfo", api_key)
     df = df.reset_index().drop('index', 1)
     url = df.ix[0].link
     print "ZOOMINFO URL", url
     html = Google().cache(url)
     html = requests.get(url).text
     html = self._remove_non_ascii(html)
     zoominfo = self._cache_html_to_df(html)
     zoominfo['company_name'] = name
     zoominfo['handle'] = url
     zoominfo["domain_search"] = True
     zoominfo["domain"] = domain
     print zoominfo
     CompanyInfoCrawl()._persist(zoominfo, "zoominfo", api_key)
Ejemplo n.º 10
0
    def garble(self, content, time=10):
        print(content)
        print("--------")
        translator = Google()
        self.curr = content
        while (time > 0):
            print("current time is " + str(time))
            self.curr = translator.translate(
                'zh-CN',
                'fr',
                self.curr,
            )
            self.curr = translator.translate(
                'fr',
                'ko',
                self.curr,
            )
            self.curr = translator.translate(
                'ko',
                'zh-CN',
                self.curr,
            )
            time -= 1

        return self.curr
Ejemplo n.º 11
0
 def init(self):
     self.google = Google()
     self.bar = [i for i in u'.' * self.size]
     self.size = float(self.size)
     self.min = float(self.min)
     self.max = float(self.max)
     self.range = self.max - self.min
Ejemplo n.º 12
0
    def _company_blog(self, domain, api_key="", name=""):
        #TODO get blog url
        df = Google().search('inurl:blog site:{0}'.format(domain), 1)
        print df
        if df.empty: return
        df["count"] = [len(url) for url in df.link]
        df = df.reset_index().drop('index',1)
        df = df.drop('title', 1)
        url = df.sort('count').url.ix[0]
        df["timestamp"] = [i.split("...")[0].strip() for i in df.link_span]
        months = list(calendar.month_abbr)
        timestamps = []
        for _date in df.timestamp:
            try:
                num = months.index(_date.split(" ")[0])
            except:
                timestamps.append(0)
                continue
            _date = str(num)+" "+" ".join(_date.split(" ")[1:])
            try:
              timestamps.append(arrow.get(_date, "M D, YYYY").timestamp)
            except:
                if "day" in i:
                  num = int(i.split())
                  timestamps.append(arrow.utcnow().replace(days=num*-1).timestamp)
                else:
                  timestamps.append(0)
        df["timestamp"] = timestamps

        data = {'data': df.to_dict('r'), 'blog_url':url}
        data["domain"] = domain
        data["api_key"] = api_key
        data["company_name"] = name
        CompanyExtraInfoCrawl()._persist(data, "blog_data", api_key)
Ejemplo n.º 13
0
 def _related(self, domain, api_key="", name=""):
     companies = Google().search("related:{0}".format(domain), 10)
     companies = companies.drop_duplicates()
     companies.columns = ['link','description','title','lol','lmao']
     data = {'data':companies.to_dict('r'),"domain":domain,"company_name":name}
     data["api_key"] = api_key
     CompanyExtraInfoCrawl()._persist(data, "similar", api_key)
Ejemplo n.º 14
0
    def search_webhook(self, domain, objectId):
        pw = Google().search('"{0}" site:prnewswire.com'.format(domain))
        bw = Google().search('"{0}" site:businesswire.com'.format(domain))
        job_queue_lol = objectId + str(arrow.now().timestamp)

        if not pw.empty:
            for link in pw.link:
                print "PW STARTED", pw.shape, link
                job = q.enqueue(PRNewsWire()._email_webhook,
                                domain,
                                link,
                                job_queue_lol,
                                objectId,
                                timeout=3600)
                job.meta['profile_id1'] = job_queue_lol
                job.save()
        print len(q.jobs)

        if not bw.empty:
            for link in bw.link:
                print "BW STARTED", bw.shape, link
                job = q.enqueue(BusinessWire()._email_webhook,
                                domain,
                                link,
                                job_queue_lol,
                                objectId,
                                timeout=3600)
                job.meta['profile_id1'] = job_queue_lol
                job.save()
        print len(q.jobs)
Ejemplo n.º 15
0
    def _employees(self, company_name="", keyword=None):
        ''' Linkedin Scrape '''
        # TODO - add linkedin directory search
        ''' Linkedin Scrape'''
        args = '-inurl:"/dir/" -inurl:"/find/" -inurl:"/updates" -inurl:"/title/" -inurl:"/pulse/"'
        args = args+' -inurl:"job" -inurl:"jobs2" -inurl:"company"'
        qry = '"at {0}" {1} {2} site:linkedin.com'
        qry = qry.format(company_name, args, keyword)
        #results = Google().search(qry, 10)
        results = Google().search(qry, 1)
        results = results.dropna()
        results = Google()._google_df_to_linkedin_df(results)
        _name = '(?i){0}'.format(company_name)
        print results.columns
        if results.empty: 
            print "No employees found for", company_name, keyword
            return results

        if " " in company_name:
            results['company_score'] = [fuzz.partial_ratio(_name, company) 
                                        for company in results.company_name]
        else:
            results['company_score'] = [fuzz.ratio(_name, company) 
                                        for company in results.company_name]
        if keyword:
            results['score'] = [fuzz.partial_ratio(keyword, title) 
                                for title in results.title]
            results = results[results.score > 75]
        results = results[results.company_score > 49]
        results = results.drop_duplicates()
        return results
Ejemplo n.º 16
0
def query_routes(sensor='false', mode='transit', 
        **kwargs):
    gmap = Google()
    params = {}
    params["sensor"] = sensor
    params["mode"] = mode
    params.update(kwargs)
    response = gmap.directions(params)
    return response
Ejemplo n.º 17
0
    def streaming_search(self, domain):
        pw = Google().search('"{0}" site:prnewswire.com'.format(domain))
        bw = Google().search('"{0}" site:businesswire.com'.format(domain))

        for link in pw.link:
            pn_emails = PRNewsWire()._find_emails(domain, link, False)
        for link in bw.link:
            bw_emails = BusinessWire()._find_emails(domain, link, False)
        ''' enqueue and return values '''
        return pd.concat([pn_emails, bw_emails]).drop_duplicates('pattern')
Ejemplo n.º 18
0
 def _daily_news(self, domain, api_key="",  name=""):
     df = Google().search("site:linkedin.com/company {0}".format(domain))
     if df.empty: return 
     #for link in df.link:
     link = df.link.tolist()[0]
     print link
     html = Google().cache(link)
     posts = self._company_posts(html)
     #Linkedin()._signal(link, api_key)
     data = {"data":posts, "company_name":name, "domain":domain}
     CompanyExtraInfoCrawl()._persist(data, "linkedin_posts", api_key)
Ejemplo n.º 19
0
 def _company_profile(self, name, api_key=""):
     df = Google().search('site:facebook.com {0}'.format(name))
     if df.empty: return df
     url = df.link.tolist()[0]
     html = Google().cache(url)
     #browser = Browser('phantomjs')
     #browser.visit(url)
     val = self._html_to_dict(html)
     print val
     val["company_name"] = name
     CompanyInfoCrawl()._persist(val, "facebook", api_key)
Ejemplo n.º 20
0
 def _related(self, domain, api_key="", name=""):
     companies = Google().search("related:{0}".format(domain), 10)
     companies = companies.drop_duplicates()
     companies.columns = ['link', 'description', 'title', 'lol', 'lmao']
     data = {
         'data': companies.to_dict('r'),
         "domain": domain,
         "company_name": name
     }
     data["api_key"] = api_key
     CompanyExtraInfoCrawl()._persist(data, "similar", api_key)
Ejemplo n.º 21
0
  def _email_search(self, email, api_key=""):
      try:
          person = clearbit.Person.find(email=email, stream=True)
      except:
          person = None
      data = {"pattern":None, "name":None, "email":email,
              "domain":email.split("@")[-1], "crawl_source":"email_hunter"}
      if person:
          pattern = EmailGuessHelper()._find_email_pattern(person["name"]["fullName"], email)
          if pattern: 
              data = {"pattern":pattern, "name":person["name"]["fullName"], "email":email,
                      "domain":email.split("@")[-1], "crawl_source":"email_hunter"}
      elif not person or not pattern:
          person = FullContact()._person_from_email(email)
          print person
          try:
              person = person["contactInfo"]["fullName"]
              fullcontact_person = True
          except:
              fullcontact_person = False

          if fullcontact_person:
              person = person["contactInfo"]["fullName"]
              pattern = EmailGuessHelper()._find_email_pattern(person, email)
              data = {"pattern":pattern, "name":person, "email":email,
                      "domain":email.split("@")[-1], "crawl_source":"email_hunter"}
              print pattern
          else:
              _email = email.replace(".", " ").replace("-", " ").replace("_"," ")
              _email = _email.replace("@", " ")
              g = Google().search("{0} site:linkedin.com/pub".format(_email))
              g1 = Google().search("{0} site:linkedin.com/pub".format(_email.split(" "[0])))
              g2 = Google().search("{0} site:linkedin.com/pub".format(_email).split(" ")[-1])
              g = pd.concat([g, g1, g2])
              choices = [i.split(" |")[0] for i in g.link_text]
              person = process.extract(_email, choices, limit=1)
              try:
                person = person[0][0]
              except:
                ''' '''
              pattern = EmailGuessHelper()._find_email_pattern(person, email)
              print "google search pattern", pattern
              if pattern:
                  data = {"pattern":pattern, "name":person, "email":email,
                          "domain":email.split("@")[-1], "crawl_source":"email_hunter"}
              else:
                  data = {"pattern":None, "name":None, "email":email,
                          "domain":email.split("@")[-1], "crawl_source":"email_hunter"}
      #data = pd.DataFrame([data])
      conn = r.connect(host="localhost", port=28015, db="triggeriq")
      r.table('email_pattern_crawls').insert(data).run(conn)
      #CompanyEmailPatternCrawl()._persist(data, "emailhunter", api_key)
      # persist to rethinkdb
      print "person", person
Ejemplo n.º 22
0
 def _news(self, domain, api_key="", company_name="", period=None):
     name = domain.split(".")[0] if company_name == "" else company_name
     if period:
         df = Google().news_search(name, 1, period)
     else:
         df = Google().news_search(name)
     print df
     data = {'data': df.to_dict('r'), 'site_url': domain}
     data["domain"] = domain
     data["api_key"] = api_key
     data["company_name"] = company_name
     CompanyExtraInfoCrawl()._persist(data, "general_news", api_key)
Ejemplo n.º 23
0
 def _daily_news(self, domain, api_key="", name=""):
     df = Google().search('site:facebook.com {0}'.format(domain))
     link = df.link.tolist()[0]
     html = Google().cache(link)
     posts = Facebook()._posts(html)
     posts = pd.DataFrame(posts).fillna("")
     data = {
         "data": posts.to_dict("r"),
         "domain": domain,
         "company_name": name
     }
     CompanyExtraInfoCrawl()._persist(data, "facebook_posts", api_key)
Ejemplo n.º 24
0
 def _news(self, domain, api_key="", company_name="", period=None):
     name = domain.split(".")[0] if company_name == "" else company_name
     if period:
         df = Google().news_search(name, 1, period)
     else:
         df = Google().news_search(name)
     print df
     data = {'data': df.to_dict('r'), 'site_url':domain}
     data["domain"] = domain
     data["api_key"] = api_key
     data["company_name"] = company_name
     CompanyExtraInfoCrawl()._persist(data, "general_news", api_key)
Ejemplo n.º 25
0
    def _directory_search(self, name, description):
        qry = name+' "{0}" site:linkedin.com/pub/dir'.format(description)
        qry = filter(lambda x: x in string.printable, qry)
        results = Google().search(qry)
        count = 0
        while results.empty:
            print "trying again"
            results = Google().search(qry)
            count = count + 1
            if count > 2: break

        print results
        return results.url if not results.empty else []
Ejemplo n.º 26
0
    def __init__(self, connection, output_dir, config):
        self.connection = connection
        self.cursor = connection.cursor()
        self.output_dir = output_dir
        self.config = config

        self.PgRouting = PgRouting(cursor=self.cursor)
        self.Google = Google(config['google_api_key'])
        self.MapQuest = MapQuest(config['mapquest_api_key'])
        self.RoutesProcessor = RoutesProcessor()

        self.time_named_dir = self.create_execution_directory()
        self.run()
Ejemplo n.º 27
0
 def _domain_search(self, domain, api_key="", name=""):
     df = Google().search('site:facebook.com {0}'.format(domain))
     for url in df.link:
         #browser = Browser('phantomjs')
         #browser.visit(url)
         # html = browser.html
         html = Google().cache(url)
         if domain not in BeautifulSoup(html).text: continue
         val = self._html_to_dict(html)
         val["company_name"] = name
         val["domain"] = domain
         CompanyInfoCrawl()._persist(val, "facebook", api_key)
         break
Ejemplo n.º 28
0
    def _news(self, domain, api_key="", company_name=""):
        # TODO - include general info links
        browser = Browser('phantomjs')
        browser.visit('http://google.com')
        browser.find_by_name('q').first.fill(company_name)
        browser.find_by_name('btnG').first.click()
        browser.find_link_by_text('News').first.click()
        url = browser.evaluate_script("document.URL")
        url = url + "&tbs=qdr:m,sbd:1" + "&num=100&filter=0&start=0"
        browser.visit(url)
        pages = pd.DataFrame()
        df = Google()._results_html_to_df(browser.html)

        pages = pages.append(df)
        #print browser.find_by_css('td > a')
        if browser.find_by_css('td > a') == []:
            pages = pages.to_dict('r')
            pages = {
                'data': pages,
                'company_name': company_name,
                "domain": domain
            }
            CompanyExtraInfoCrawl()._persist(pages, "general_news", api_key)

        try:
            _next = browser.find_by_css('td > a')[-1].text
        except:
            _next = None
        if _next:
            while "Next" in _next:
                browser.find_by_css('td > a')[-1].click()
                df = Google()._results_html_to_df(browser.html)
                pages = pages.append(df)

        #pages = pages[~pages.title.str.contains("press release")]
        pages = pages[pages.link_span.str.contains(
            '(?i){0}'.format(company_name))]
        pages.columns = ['link', 'description', 'title', 'info', '']
        pages['date'] = [i.split('-')[-1] for i in pages['info']]
        pages["timestamp"] = [
            Helper()._str__to_timestamp(i) for i in pages.date
        ]
        pages['news_source'] = [i.split('-')[0] for i in pages['info']]
        pages = pages.drop_duplicates()
        del pages[""]
        print pages.columns

        pages = pages.to_dict('r')
        pages = {'data': pages, 'company_name': company_name, "domain": domain}
        CompanyExtraInfoCrawl()._persist(pages, "general_news", api_key)
Ejemplo n.º 29
0
 def _html_to_dict(self, url):
     cb = Google().cache(url)
     cb = BeautifulSoup(cb)
     info = cb.find('div',{'class':'info-card-content'})
     if info:
       info = info.find('div',{'class':'definition-list'})
     else:
       return {}
     vals = [label.text for label in info.find_all('dd')]
     cols = [label.text[:-1].lower() for label in info.find_all('dt')]
     info = dict(zip(cols, vals))
     info['logo'] = "http://crunchbase.com"+cb.find('img')['src']
     info['name'] = cb.find('h1').text
     info['handle'] = url
     return info
Ejemplo n.º 30
0
 def _signal(self, link, api_key=""):
     html = Google().cache(link)
     info = self._html_to_dict(html)
     posts = self._posts(html)
     CompanyInfoCrawl()._persist(info, "facebook", api_key)
     for post in posts:
         CompanyExtraInfoCrawl()._persist(post, "facebook_posts", api_key)
Ejemplo n.º 31
0
 def _reviews(self, domain, api_key="", name=""):
     df = Google().search('site:glassdoor.com/reviews {0}'.format(name))
     if df.empty: return
     url = df.ix[0].link
     r = BeautifulSoup(Crawlera().get(url).text)
     if not r.find("a", {"class": "sortByDate"}): return
     url = "http://glassdoor.com" + r.find("a",
                                           {"class": "sortByDate"})["href"]
     print url
     r = requests.get(
         "http://localhost:8950/render.html?url={0}".format(url))
     r = BeautifulSoup(r.text)
     rating = r.find('div', {'class': 'ratingNum'})
     rating = rating.text if rating else ""
     # TODO - awards
     reviews = pd.DataFrame()
     for review in r.find_all('li', {'class': 'empReview'}):
         pros = review.find('p', {'class': 'pros'})
         cons = review.find('p', {'class': 'cons'})
         extra = review.find('p', {'class': 'notranslate'})
         summary = review.find('span', {'class': 'summary'})
         date = review.find('time', {'class': 'date'})
         vals = [pros, cons, extra, summary, date]
         cols = ["pros", "cons", "extra", "summary", "date"]
         vals = [val.text.strip() if val else "" for val in vals]
         data = dict(zip(cols, vals))
         data["timestamp"] = Helper()._str_to_timestamp(data["date"])
         reviews = reviews.append(data, ignore_index=True)
     return reviews
Ejemplo n.º 32
0
 def init(self):
     self.google = Google()
     self.bar = [i for i in u"." * self.size]
     self.size = float(self.size)
     self.min = float(self.min)
     self.max = float(self.max)
     self.range = self.max - self.min
Ejemplo n.º 33
0
 def _linkedin_profile_from_name(self, company_name):
     qry = company_name+' site:linkedin.com/company'
     google_results = Google().search(qry)
     if google_results.empty: return "not found"
     url = google_results.ix[0].url
     # scrape cache
     return url if "/company/" in url else "not found"
Ejemplo n.º 34
0
 def _html_to_dict(self, url):
     r = BeautifulSoup(Google().cache(url))
     logo = r.find('div', {'class': 'logo'})
     if logo:
         logo = logo.find('img')
         logo = logo['src'] if logo else ""
     else:
         logo = ""
     #website = r.find('span',{'class':'hideHH'}).text
     info = r.find('div', {'id': 'EmpBasicInfo'})
     if info:
         info = info.find_all('div', {'class': 'empInfo'})
     else:
         return {}
     info = dict([[
         i.find('strong').text.lower().strip(),
         i.find('span').text.strip()
     ] for i in info])
     info['name'] = r.find('div', {'class': 'header'}).find('h1').text
     info['description'] = r.find('p', {'id': 'EmpDescription'})
     info['description'] = info['description'].text if info[
         'description'] else ""
     info['logo'] = logo
     info['handle'] = url
     return info
Ejemplo n.º 35
0
 def _signal(self, link, api_key=""):
     html = Google().cache(link)
     info = self._html_to_dict(html)
     tweets = self._tweets(html)
     CompanyInfoCrawl()._persist(info, "twitter", api_key)
     for tweet in tweets:
         CompanyExtraInfoCrawl()._persist(tweet, "tweets", api_key)
Ejemplo n.º 36
0
class Main(Module):

    pattern = re.compile(r'^\s*(sunrise|sunset)(?:\s+(@?)(.+?))?\s*$', re.I)
    help = '(sunrise|sunset) [location|@nick] - get time of sun rise/set'
    error = u"That place doesn't have a sun, sorry"

    def init(self):
        self.colorlib = self.madcow.colorlib
        try:
            self.learn = Learn(madcow=madcow)
        except:
            self.learn = None
        self.google = Google()

    def response(self, nick, args, kwargs):
        query, args = args[0], args[1:]
        if not args[1]:
            args = 1, nick
        if args[0]:
            location = self.learn.lookup('location', args[1])
            if not location:
                return u'%s: Try: set location <nick> <location>' % nick
        else:
            location = args[1]
        response = self.google.sunrise_sunset(query, location)
        return u'%s: %s' % (nick, response)
Ejemplo n.º 37
0
class Main(Module):

    pattern = re.compile(r'^\s*(sunrise|sunset)(?:\s+(@?)(.+?))?\s*$', re.I)
    help = '(sunrise|sunset) [location|@nick] - get time of sun rise/set'
    error = u"That place doesn't have a sun, sorry"

    def init(self):
        self.colorlib = self.madcow.colorlib
        try:
            self.learn = Learn(madcow=madcow)
        except:
            self.learn = None
        self.google = Google()

    def response(self, nick, args, kwargs):
        query, args = args[0], args[1:]
        if not args[1]:
            args = 1, nick
        if args[0]:
            location = self.learn.lookup('location', args[1])
            if not location:
                return u'%s: Try: set location <nick> <location>' % nick
        else:
            location = args[1]
        response = self.google.sunrise_sunset(query, location)
        return u'%s: %s' % (nick, response)
Ejemplo n.º 38
0
 def init(self):
     self.colorlib = self.madcow.colorlib
     try:
         self.learn = Learn(madcow=madcow)
     except:
         self.learn = None
     self.google = Google()
Ejemplo n.º 39
0
    def _press_search(self, domain, api_key):
        pw = Google().search('"{0}" site:prnewswire.com'.format(domain))
        bw = Google().search('"{0}" site:businesswire.com'.format(domain))
        #job_queue_lol = objectId+str(arrow.now().timestamp)
        print bw, pw
        pw = pw if not pw.empty else pd.DataFrame(columns=["link"])
        bw = pw if not bw.empty else pd.DataFrame(columns=["link"])
        queue = "press-check-" + domain
        for link in pw.link:
            job = q.enqueue(PRNewsWire()._email, domain, link, timeout=3600)
            RQueue()._meta(job, "{0}_{1}".format(domain, api_key))

        for link in bw.link:
            job = q.enqueue(BusinessWire()._email, domain, link, timeout=3600)
            RQueue()._meta(job, "{0}_{1}".format(domain, api_key))
        '''
Ejemplo n.º 40
0
 def _search(self, company_name, api_key=""):
     qry = 'site:linkedin.com inurl:"at-{0}" inurl:title -inurl:job'
     #TODO - remove, all [".","'",","]
     name = company_name.strip().lower().replace(" ", "-")
     dirs = Google().search(qry.format(name), 1)
     for url in dirs.url:
         q.enqueue(LinkedinTitleDir().parse, url, company_name)
Ejemplo n.º 41
0
def main():

    #fetch database credentials from env variables
    db_name = os.environ["ITIME_DB"]
    db_user = os.environ["ITIME_DB_USER"]
    db_password = os.environ["ITIME_DB_PASSWORD"]

    db = Database(db_name, db_user, db_password)
    connection_tries = 0

    #try to establish db connection, quit if it fails
    while (not db.connect()):
        print("Trying to reconnect to db,try starting postgres")
        time.sleep(5)
        if (connection_tries > 1):
            sys.exit(0)
        connection_tries += 1

    #fetch google client secret file path
    google_api_file = os.environ["ITIME_GOOGLE_API_FILE"]
    google = Google(google_api_file)

    controller = Controller(db, google)

    #Config for rabbitmq
    rabbit_server = os.environ["ITIME_RABBIT_SERVER"]
    rabbit_queue = os.environ["ITIME_RABBIT_US_QUEUE"]

    rabbit = AmqpServer(rabbit_server, rabbit_queue, controller.incoming)
    rabbit.start()
    print("Exiting...")
Ejemplo n.º 42
0
    def _recent_webpages_published(self, domain, api_key="", name="", period=None):
        if period:
          df = Google().search("site:{0}".format(domain), 1, "d")
          df2 = Google().search("{0}".format(name), 1, "d")
        else:
          df = Google().search("site:{0}".format(domain))
          df2 = Google().search("{0}".format(name))

        # TODO - add timestamps 
        # TODO - queue scrapes
        df = pd.concat([df, df2])

        data = {'data': df.to_dict('r'), 'site_url':domain}
        data["domain"] = domain
        data["api_key"] = api_key
        data["company_name"] = name
        CompanyExtraInfoCrawl()._persist(data, "recent_webpages", api_key)
Ejemplo n.º 43
0
class Main(Module):

    pattern = re.compile(r"^\s*(care|dongs|boner)\s+(.+?)\s*$", re.I)
    help = "\n".join([u"care <#> - display a care-o-meter", u"dongs <#> - like care, but more penile"])
    error = u"invalid care factor"
    isnum = re.compile(r"^\s*[0-9.]+\s*$")
    sep = re.compile(r"\s*=\s*")
    numsep = re.compile(r"(\d)\s+(\d)")
    title = u"CARE-O-METER"

    # settings
    size = 40
    min = 0
    max = 100
    maxboner = 3 * 400

    def init(self):
        self.google = Google()
        self.bar = [i for i in u"." * self.size]
        self.size = float(self.size)
        self.min = float(self.min)
        self.max = float(self.max)
        self.range = self.max - self.min

    def response(self, nick, args, kwargs):
        command, val = args
        iscare = command == "care"
        if not self.isnum.search(val):
            # try google calculator if not a number
            try:
                val = self.google.calculator(val)
                val = self.numsep.sub(r"\1\2", val)
                val = self.sep.split(val)[1]
                val = val.split()[0]
            except:
                return u"%s: what is this i don't even"
        val = float(val)

        # sanity check value
        if val < self.min:
            val = self.min
        elif val > self.max:
            if iscare:
                val = self.max
            else:
                val = self.maxboner

        pos = int(round((self.size - 1) * ((val - self.min) / self.range)))
        if command == "care":
            bar = list(self.bar)
            bar[pos] = u"o"
            bar = u"".join(bar)
            bar = u"|" + bar + u"|"
            bar = u"%s: %s" % (self.title, bar)
        else:
            bar = u"8" + "=" * pos + "e"
        return bar
Ejemplo n.º 44
0
def query():
    job = request.args['job']
    
    lat = request.args['lat']
    lng = request.args['lng']
    city = get_city(lat, lng)
    print city
    
    rs = Google.search(urllib2.quote((u'%s %s 招聘' % (city, job,)).encode('utf-8')), append_url = '&tbs=qdr:w')
    return json.dumps({'result': [{"name": x.name, "email": x.email, "link":x.link} for x in rs]})
Ejemplo n.º 45
0
 def __init__(self, db):
     self.dic_wind = {}
     self.dic_button = {}
     self.dic_err_win = {}
     self.layout = None
     self.mygroupbox = QGroupBox('Contacts list')
     self.myform = QFormLayout()
     self.scroll = QScrollArea()
     self.db = db
     self.google_api = Google()
     super().__init__()
Ejemplo n.º 46
0
    def _google_span_search(self, domain):
      queue = "google-span-"+domain
      qry_1 = '("media contact" OR "media contacts" OR "press release") "@{0}"'
      qry_1 = qry_1.format(domain)
      qry_2 = '"email * * {0}"'.format(domain)
      first = Google().ec2_search(qry_1)
      second = Google().ec2_search(qry_2)

      if not first.empty:
          first = first[first.link_span.str.contains('@')]
          q.enqueue(Sources()._google_cache_search, domain, first.link)
      if not second.empty:
          second = second[second.link_span.str.contains('@')]
          q.enqueue(Sources()._google_cache_search, domain, second.link)

      emails = [[email for email in span.split() if "@" in email] 
                for span in first.append(second).link_span]
      emails = pd.Series(emails).sum()
      emails = self._research_emails(emails)
      CompanyEmailPatternCrawl()._persist(emails, "google_span_search")
Ejemplo n.º 47
0
    def _company_blog(self, domain, period=None):
        #TODO get blog url
        if period:
          df = Google().search('inurl:blog site:{0}'.format(domain), 1, "d")
        else:
          df = Google().search('inurl:blog site:{0}'.format(domain), 1)

        if df.empty: return
        df["count"] = [len(url) for url in df.link]
        df = df.reset_index().drop('index',1)
        df = df.drop('title', 1)
        url = df.sort('count').url.ix[0]
        df["timestamp"] = [i.split("...")[0].strip() for i in df.link_span]
        months = list(calendar.month_abbr)
        timestamps = []
        for _date in df.timestamp:
            try:
                num = months.index(_date.split(" ")[0])
            except:
                timestamps.append(0)
                continue
            _date = str(num)+" "+" ".join(_date.split(" ")[1:])
            try:
              timestamps.append(arrow.get(_date, "M D, YYYY").timestamp)
            except:
                if "day" in i:
                  num = int(i.split())
                  timestamps.append(arrow.utcnow().replace(days=num*-1).timestamp)
                else:
                  timestamps.append(0)
        df["timestamp"] = timestamps
        data = df
        print data
        data["domain"] = domain
        data["event_type"] = "CompanyBlogEvent"
        data = data.applymap(lambda x: self._remove_non_ascii(x))
        data["event_key"] = ["".join(map(str, _data.values()))[:124]
                             for _data in data.to_dict("r")]
        data = [row.dropna().to_dict() for i, row in data.iterrows()]
        r.table("events").insert(data).run(conn)
        return data
Ejemplo n.º 48
0
 def _company_profile(self, name, api_key=""):
     df = Google().search('site:indeed.com/cmp {0}'.format(name))
     if df.empty: 
         return CompanyInfoCrawl()._persist({'company_name': name}, 
                                            "indeed", api_key)
     df['_name'] = [i.split("Careers and Employment")[0].strip() 
                    for i in df.link_text]
     df["score"] = [fuzz.ratio(b, name) for b in df._name]
     df = df[df.score > 70]
     df = df.reset_index().drop('index',1)
     df = df.sort('score',ascending=False)
     if df.empty: 
       return CompanyInfoCrawl()._persist({'company_name': name},"indeed",api_key)
     else:
       url = df.ix[0].link
     val = self._html_to_dict(url)
     print "name"
     val["handle"] = url
     val['company_name'] = name
     print val
     CompanyInfoCrawl()._persist(val, "indeed", api_key)
Ejemplo n.º 49
0
Archivo: calc.py Proyecto: Havvy/madcow
class Main(Module):

    pattern = re.compile(u'^\s*calc\s+(.+)', re.I)
    require_addressing = True
    help = u'calc <expression> - pass expression to google calculator'
    error = 'No results (bad syntax?)'

    def init(self):
        self.google = Google()

    def response(self, nick, args, kwargs):
        return u'%s: %s' % (nick, self.google.calculator(args[0]))
Ejemplo n.º 50
0
    def __init__(self, connection, output_dir, config):
        self.connection = connection
        self.cursor = connection.cursor()
        self.output_dir = output_dir
        self.config = config

        self.PgRouting = PgRouting(cursor=self.cursor)
        self.Google = Google(config['google_api_key'])
        self.MapQuest = MapQuest(config['mapquest_api_key'])
        self.RoutesProcessor = RoutesProcessor()

        self.time_named_dir = self.create_execution_directory()
        self.run()
Ejemplo n.º 51
0
    def _parse_response(self, html, company_name, keyword=None):
        results = Google()._results_html_to_df(html)
        results = results.dropna()
        results = Google()._google_df_to_linkedin_df(results)
        _name = '(?i){0}'.format(company_name)
        print results.columns
        if results.empty: 
            print "No employees found for", company_name, keyword
            return results

        if " " in company_name:
            results['company_score'] = [fuzz.partial_ratio(_name, company) 
                                        for company in results.company_name]
        else:
            results['company_score'] = [fuzz.ratio(_name, company) 
                                        for company in results.company_name]
        if keyword:
            results['score'] = [fuzz.partial_ratio(keyword, title) 
                                for title in results.title]
            results = results[results.score > 75]
        results = results[results.company_score > 49]
        results = results.drop_duplicates()
        return results
Ejemplo n.º 52
0
    def _persist(self, res):
        url, body = res.effective_url, res.body
        if "google" in url:
            df = Google()._results_html_to_df(body)
        elif "yandex" in url:
            df = Yandex()._html_to_df(body)
        elif "duckduckgo" in url:
            df = DuckDuckGo()._html_to_df(body)
        elif "bing" in url:
            df = Bing()._html_to_df(body)

        df["engine_url"] = url
        u = urllib.unquote_plus(url)
        df["qry"] = urllib.unquote_plus(u.split("url=")[-1].split("=")[-1])
        df["search_engine"] = urlparse.urlparse(u.split("url=")[-1]).netloc
        data = {
          "engine_url": url,
          "qry": urllib.unquote_plus(u.split("url=")[-1].split("=")[-1]),
          "search_engine":urlparse.urlparse(u.split("url=")[-1]).netloc,
          "res":df.to_dict("r")[:15]
        }
        #TODO persist to rethinkdb
        return data
Ejemplo n.º 53
0
    def _press_releases(self, qry, company_domain=None, period=None):
        queries = ['"{0}" site:prnewswire.com'.format(qry),
                   '"{0}" site:businesswire.com'.format(qry),
                   '"{0}" site:marketwired.com'.format(qry),
                   '"{0}" site:newswire.ca'.format(qry),
                   '"{0}" site:reuters.com'.format(qry)]

        p = Google()._multi_get(queries)
        try:
          p = p.drop_duplicates()
        except:
          """ """
        #p['date'] = [span.split('Business Wire')[-1].split('...')[0].strip() for span in p.link_span]
        p['description'] = ["".join(span.split('...')[1:]).strip() for span in p.link_span]
        p["domain"] = company_domain
        p['date'] = [span.split('...')[0].strip() for span in p.link_span]
        p["timestamp"] = [Helper()._str_to_timestamp(i) for i in p.date]
        p['title'] = p['link_text']

        p = p.drop('link_text',1)
        p = p.drop('url',1)
        p = p.drop('link_span',1)
        #for i in p.timestamp: print i
        data = p
        data["domain"] = company_domain
        data["domain"] = company_domain
        data["event_type"] = "CompanyPressEvent"
        data = data.applymap(lambda x: self._remove_non_ascii(x))
        data["event_key"] = ["".join(map(str, _data.values()))[:124]
                             for _data in data.to_dict("r")]
        _df = data.to_dict("r")
        for i in _df:
            for key in i.keys():
                if i[key] == None: del i[key]
        data = [row.dropna().to_dict() for i, row in data.iterrows()]
        r.table("events").insert(data).run(conn)
        return data
Ejemplo n.º 54
0
class Main(Module):

    pattern = re.compile(r'^\s*(?:clock|time)(?:\s*[:-]\s*|\s+)(.+?)\s*$', re.I)
    help = u'time <location> - ask google what time it is somewhere'
    in_re = re.compile(r'^\s*in\s+', re.I)

    def init(self):
        self.google = Google()

    def response(self, nick, args, kwargs):
        query = args[0]
        query = self.in_re.sub('', query)
        result = self.google.clock(query)
        if result:
            return u'%s: %s' % (nick, result)
        else:
            return u"%s: They don't do the whole time thing in \"%s\"" % (nick, query)
Ejemplo n.º 55
0
class Contacts(QWidget):
    def __init__(self, db):
        self.dic_wind = {}
        self.dic_button = {}
        self.dic_err_win = {}
        self.layout = None
        self.mygroupbox = QGroupBox('Contacts list')
        self.myform = QFormLayout()
        self.scroll = QScrollArea()
        self.db = db
        self.google_api = Google()
        super().__init__()

    def init_ui(self):
        sender = self.sender()
        error = None
        if sender is not None:
            if sender.text() == '&VK':
                if not self.db.db_exists():
                    error = self.db.create(download_data(sender.text()))
                else:
                    error = self.db.update_database(
                        download_data(sender.text()))
            if sender.text() == '&Facebook':
                if not self.db.db_exists():
                    error = self.db.create(download_data(sender.text()))
                else:
                    error = self.db.update_database(
                        download_data(sender.text()))

        if error is not None:
            self.dic_err_win[error] = ErrorWindow(error)
            self.dic_err_win[error].init_ui()
        else:
            for friend in self.db.get_list_users():
                button = self.create_button(friend)
                self.myform.addRow(button)

            self.mygroupbox.setLayout(self.myform)

            self.scroll.setWidget(self.mygroupbox)
            self.scroll.setWidgetResizable(True)
            self.scroll.setFixedHeight(600)
            self.layout = QVBoxLayout(self)
            self.layout.addWidget(self.scroll)

    def create_button(self, friend):
        name = friend
        button = QPushButton(name, self)
        self.dic_button[name] = button
        inf_friend = self.db.get_user_inf(friend)
        if inf_friend['picture'] != '':
            logo = inf_friend['picture']

        self.dic_wind[name] = Window(name, logo,
                                     inf_friend, self, self.google_api)
        button.clicked.connect(lambda: self.dic_wind[name].init_ui())
        return button

    def import_all_contacts(self):
        list_users = self.db.get_list_users()
        if len(list_users) != 0:
            open_new('https://contacts.google.com')
            for contact in list_users:
                contact_data = self.db.get_user_inf(contact)
                self.google_api.create_contact(
                    self.google_api.create_xml(contact_data))

    def redrawing(self):
        self.clear_window()
        self.init_ui()

    def clear_layout(self, layout):
        for i in range(layout.count()):
            if layout.itemAt(i) is not None:
                layout.itemAt(i).widget().setParent(None)

    def clear_window(self):
        for i in range(self.layout.count()):
            if self.layout.itemAt(i) is not None:
                if self.layout.itemAt(i).layout() is not None:
                    self.clear_layout(self.layout.itemAt(i).layout())
                    self.layout.itemAt(i).layout().setParent(None)
                if self.layout.itemAt(i).widget() is not None:
                    self.layout.itemAt(i).widget().setParent(None)
Ejemplo n.º 56
0
class Main(object):
    """Main class for application.

    :arg connection: psycopg2 connection for database
    :type connection: psycopg2._psycopg.connection

    :arg output_dir: directory for saving output files
    :type output_dir: string

    """

    def __init__(self, connection, output_dir, config):
        self.connection = connection
        self.cursor = connection.cursor()
        self.output_dir = output_dir
        self.config = config

        self.PgRouting = PgRouting(cursor=self.cursor)
        self.Google = Google(config['google_api_key'])
        self.MapQuest = MapQuest(config['mapquest_api_key'])
        self.RoutesProcessor = RoutesProcessor()

        self.time_named_dir = self.create_execution_directory()
        self.run()

    def run(self):
        """Main application function. Reads input file with locations for
        routing, executes method that creates output directory for routes,
        executes functions for getting routes, executes functions for
        processing routes.
        """
        # Open and read input file with locations. File content looks like
        # [{"start": {"x": 15.5, "y": 45.5},"end": {"x": 16.5, "y": 43.5}}].
        locations_file = open('locations.txt', 'r')
        locations_file_content = locations_file.read()

        # Convert string to json
        locations_list = json.loads(locations_file_content)

        # For each location pair (start-end)...
        for route_number in range(len(locations_list)):
            foldername = self.create_route_directory(route_number)

            start_coords = locations_list[route_number]['start']
            end_coords = locations_list[route_number]['end']

            # String with starting coordinates for route, e.g. '45.5,15.5'
            start_coords_string = (
                str(start_coords['y']) + ', ' + str(start_coords['x'])
            )

            # String with ending coordinates for route, e.g. '43.5,16.5'
            end_coords_string = (
                str(end_coords['y']) + ', ' + str(end_coords['x'])
            )

            pgrouting_data = (
                self.PgRouting.get_route_data(
                    start_coords=start_coords,
                    end_coords=end_coords,
                )
            )

            mapquest_data = (
                self.MapQuest.get_route_data(
                    start_coords=start_coords_string,
                    end_coords=end_coords_string,
                )
            )

            google_data = (
                self.Google.get_route_data(
                    start_coords=start_coords_string,
                    end_coords=end_coords_string,
                )
            )

            self.RoutesProcessor.process_geometry(
                pgrouting_data=pgrouting_data,
                mapquest_data=mapquest_data,
                google_data=google_data,
                route_number=route_number,
                foldername=foldername
            )
            self.RoutesProcessor.process_attributes(
                pgrouting_data=pgrouting_data,
                mapquest_data=mapquest_data,
                google_data=google_data,
                route_number=route_number,
                foldername=foldername
            )

        # Close DB connection.
        self.cursor.close()
        self.connection.close()

    def create_route_directory(self, route_number):
        """Creates directory for specific route.

        :arg route_number: ordinal of start-end location pair in file
        :type route_number: integer

        :returns: name of created directory
        :rtype: string

        """
        # Create directory named by route number.
        route_directory = (
            self.time_named_dir +
            '/route_' + str(route_number)
        )
        os.mkdir(route_directory)

        return route_directory

    def create_execution_directory(self):
        """Creates directory for current execution. Directory is named by
        current time.

        :returns: name of created directory
        :rtype: string

        """
        datetime_now = datetime.datetime.now()

        # Create directory named by current time.
        time_named_dir = (
            self.output_dir +
            '/date_' + str(datetime_now.day) + '_' + str(datetime_now.month) +
            '_' + str(datetime_now.year) + '_' + str(datetime_now.hour) + '_' +
            str(datetime_now.minute) + '_' + str(datetime_now.second)
        )
        os.mkdir(time_named_dir)

        return time_named_dir
Ejemplo n.º 57
0
 def test_google_scrape_search_result(self):
     with open('ipgoogle') as fp:
         google_search_result = Google.scrape_search_result(BeautifulSoup(fp, 'html.parser'))
         self.assertEqual(GOOGLE_SEARCH_RESULT, google_search_result)
Ejemplo n.º 58
0
 def test_google_scrape_news_result(self):
     with open('ipgooglenews') as fp:
         google_news_result = Google.scrape_news_result(BeautifulSoup(fp, 'html.parser'))
         self.assertEqual(GOOGLE_NEWS_RESULT, google_news_result)
Ejemplo n.º 59
0
    def __init__(self, server, port, nick, password, channel_key, permissions_key, orders_csv, help_html, gmail_user, gmail_password):
        # Se guardan los datos de configuracion en variables de instancia
        self.server = server
        self.port = port
        self.nick = nick
        self.password = password

        self.help_html = help_html

        # Objeto encargado de conectar con Google, se incumple POO estricto a fines de encapsular de forma severa
        self.google = Google(channel_key, permissions_key, orders_csv, gmail_user, gmail_password)

        self.channels = self.google.get_channels(True)
        # Data format: [(#channel, password)]
        self.permissions = self.google.get_permissions(True)
        # Data format: {'~': [], '&': [], '@': [], '%': [], '+': []}
        self.orders = self.google.get_orders(True)
        # Data format: {'region_name': {'id': 123, 'd1': 'prioridad', 'd2': 'prioridad', 'd3': 'prioridad', 'd4': 'prioridad', 'side': 'Rumania'}}
        self.channels_events = self.google.get_channels_evento()
        # Data format: ['#canal1', '#canal2']
        self.ingame_mu = ['2206', '2496', '2401', '2807', '1845', '665', '371', '255', '561', '4396', '4298', '398', '2105', '229', '261', '1011', '1550', '414', '3043', '2564', '2552', '4502', '1310', '542', '3199']
        # Data format: ['2206', '229']

        # Bandera de conexion, utilizada en thread principal iniciador para re-conectar
        self.running = False
        # Bandera de re-conexion, utilizada en thread principal iniciador para evitar re-conexion
        self.dont_reconnect = False

        # Diccionario utilizado para interpretar los codigos numericos de respuesta de IRC
        self.sym_to_num = {
            "RPL_WELCOME": '001',
            "RPL_YOURHOST": '002',
            "RPL_CREATED": '003',
            "RPL_MYINFO": '004',
            "RPL_ISUPPORT": '005',
            "RPL_BOUNCE": '010',
            "RPL_USERHOST": '302',
            "RPL_ISON": '303',
            "RPL_AWAY": '301',
            "RPL_UNAWAY": '305',
            "RPL_NOWAWAY": '306',
            "RPL_WHOISUSER": '******',
            "RPL_WHOISSERVER": '312',
            "RPL_WHOISOPERATOR": '313',
            "RPL_WHOISIDLE": '317',
            "RPL_ENDOFWHOIS": '318',
            "RPL_WHOISCHANNELS": '319',
            "RPL_WHOWASUSER": '******',
            "RPL_ENDOFWHOWAS": '369',
            "RPL_LISTSTART": '321',
            "RPL_LIST": '322',
            "RPL_LISTEND": '323',
            "RPL_UNIQOPIS": '325',
            "RPL_CHANNELMODEIS": '324',
            "RPL_NOTOPIC": '331',
            "RPL_TOPIC": '332',
            "RPL_INVITING": '341',
            "RPL_SUMMONING": '342',
            "RPL_INVITELIST": '346',
            "RPL_ENDOFINVITELIST": '347',
            "RPL_EXCEPTLIST": '348',
            "RPL_ENDOFEXCEPTLIST": '349',
            "RPL_VERSION": '351',
            "RPL_WHOREPLY": '352',
            "RPL_ENDOFWHO": '315',
            "RPL_NAMREPLY": '353',
            "RPL_ENDOFNAMES": '366',
            "RPL_LINKS": '364',
            "RPL_ENDOFLINKS": '365',
            "RPL_BANLIST": '367',
            "RPL_ENDOFBANLIST": '368',
            "RPL_INFO": '371',
            "RPL_ENDOFINFO": '374',
            "RPL_MOTDSTART": '375',
            "RPL_MOTD": '372',
            "RPL_ENDOFMOTD": '376',
            "RPL_YOUREOPER": '381',
            "RPL_REHASHING": '382',
            "RPL_YOURESERVICE": '383',
            "RPL_TIME": '391',
            "RPL_USERSSTART": '392',
            "RPL_USERS": '393',
            "RPL_ENDOFUSERS": '394',
            "RPL_NOUSERS": '395',
            "RPL_TRACELINK": '200',
            "RPL_TRACECONNECTING": '201',
            "RPL_TRACEHANDSHAKE": '202',
            "RPL_TRACEUNKNOWN": '203',
            "RPL_TRACEOPERATOR": '204',
            "RPL_TRACEUSER": '******',
            "RPL_TRACESERVER": '206',
            "RPL_TRACESERVICE": '207',
            "RPL_TRACENEWTYPE": '208',
            "RPL_TRACECLASS": '209',
            "RPL_TRACERECONNECT": '210',
            "RPL_TRACELOG": '261',
            "RPL_TRACEEND": '262',
            "RPL_STATSLINKINFO": '211',
            "RPL_STATSCOMMANDS": '212',
            "RPL_ENDOFSTATS": '219',
            "RPL_STATSUPTIME": '242',
            "RPL_STATSOLINE": '243',
            "RPL_UMODEIS": '221',
            "RPL_SERVLIST": '234',
            "RPL_SERVLISTEND": '235',
            "RPL_LUSERCLIENT": '251',
            "RPL_LUSEROP": '252',
            "RPL_LUSERUNKNOWN": '253',
            "RPL_LUSERCHANNELS": '254',
            "RPL_LUSERME": '255',
            "RPL_ADMINME": '256',
            "RPL_ADMINLOC": '257',
            "RPL_ADMINLOC": '258',
            "RPL_ADMINEMAIL": '259',
            "RPL_TRYAGAIN": '263',
            "ERR_NOSUCHNICK": '401',
            "ERR_NOSUCHSERVER": '402',
            "ERR_NOSUCHCHANNEL": '403',
            "ERR_CANNOTSENDTOCHAN": '404',
            "ERR_TOOMANYCHANNELS": '405',
            "ERR_WASNOSUCHNICK": '406',
            "ERR_TOOMANYTARGETS": '407',
            "ERR_NOSUCHSERVICE": '408',
            "ERR_NOORIGIN": '409',
            "ERR_NORECIPIENT": '411',
            "ERR_NOTEXTTOSEND": '412',
            "ERR_NOTOPLEVEL": '413',
            "ERR_WILDTOPLEVEL": '414',
            "ERR_BADMASK": '415',
            "ERR_UNKNOWNCOMMAND": '421',
            "ERR_NOMOTD": '422',
            "ERR_NOADMININFO": '423',
            "ERR_FILEERROR": '424',
            "ERR_NONICKNAMEGIVEN": '431',
            "ERR_ERRONEUSNICKNAME": '432',
            "ERR_NICKNAMEINUSE": '433',
            "ERR_NICKCOLLISION": '436',
            "ERR_UNAVAILRESOURCE": '437',
            "ERR_USERNOTINCHANNEL": '441',
            "ERR_NOTONCHANNEL": '442',
            "ERR_USERONCHANNEL": '443',
            "ERR_NOLOGIN": '******',
            "ERR_SUMMONDISABLED": '445',
            "ERR_USERSDISABLED": '446',
            "ERR_NOTREGISTERED": '451',
            "ERR_NEEDMOREPARAMS": '461',
            "ERR_ALREADYREGISTRED": '462',
            "ERR_NOPERMFORHOST": '463',
            "ERR_PASSWDMISMATCH": '464',
            "ERR_YOUREBANNEDCREEP": '465',
            "ERR_YOUWILLBEBANNED": '466',
            "ERR_KEYSET": '467',
            "ERR_CHANNELISFULL": '471',
            "ERR_UNKNOWNMODE": '472',
            "ERR_INVITEONLYCHAN": '473',
            "ERR_BANNEDFROMCHAN": '474',
            "ERR_BADCHANNELKEY": '475',
            "ERR_BADCHANMASK": '476',
            "ERR_NOCHANMODES": '477',
            "ERR_BANLISTFULL": '478',
            "ERR_NOPRIVILEGES": '481',
            "ERR_CHANOPRIVSNEEDED": '482',
            "ERR_CANTKILLSERVER": '483',
            "ERR_RESTRICTED": '484',
            "ERR_UNIQOPPRIVSNEEDED": '485',
            "ERR_NOOPERHOST": '491',
            "ERR_NOSERVICEHOST": '492',
            "ERR_UMODEUNKNOWNFLAG": '501',
            "ERR_USERSDONTMATCH": '502',
            "NOTICE": 'NOTICE',
        }

        # Diccionario de codigos de IRC invertido ¿Innecesario? proximamente sera removido
        self.num_to_sym = {}
        for k, v in self.sym_to_num.items():
            self.num_to_sym[v] = k
Ejemplo n.º 60
0
class FEABot():
    # Metodo constructor, seteos basicos necesarios de configuracion, instancia objetos utiles
    def __init__(self, server, port, nick, password, channel_key, permissions_key, orders_csv, help_html, gmail_user, gmail_password):
        # Se guardan los datos de configuracion en variables de instancia
        self.server = server
        self.port = port
        self.nick = nick
        self.password = password

        self.help_html = help_html

        # Objeto encargado de conectar con Google, se incumple POO estricto a fines de encapsular de forma severa
        self.google = Google(channel_key, permissions_key, orders_csv, gmail_user, gmail_password)

        self.channels = self.google.get_channels(True)
        # Data format: [(#channel, password)]
        self.permissions = self.google.get_permissions(True)
        # Data format: {'~': [], '&': [], '@': [], '%': [], '+': []}
        self.orders = self.google.get_orders(True)
        # Data format: {'region_name': {'id': 123, 'd1': 'prioridad', 'd2': 'prioridad', 'd3': 'prioridad', 'd4': 'prioridad', 'side': 'Rumania'}}
        self.channels_events = self.google.get_channels_evento()
        # Data format: ['#canal1', '#canal2']
        self.ingame_mu = ['2206', '2496', '2401', '2807', '1845', '665', '371', '255', '561', '4396', '4298', '398', '2105', '229', '261', '1011', '1550', '414', '3043', '2564', '2552', '4502', '1310', '542', '3199']
        # Data format: ['2206', '229']

        # Bandera de conexion, utilizada en thread principal iniciador para re-conectar
        self.running = False
        # Bandera de re-conexion, utilizada en thread principal iniciador para evitar re-conexion
        self.dont_reconnect = False

        # Diccionario utilizado para interpretar los codigos numericos de respuesta de IRC
        self.sym_to_num = {
            "RPL_WELCOME": '001',
            "RPL_YOURHOST": '002',
            "RPL_CREATED": '003',
            "RPL_MYINFO": '004',
            "RPL_ISUPPORT": '005',
            "RPL_BOUNCE": '010',
            "RPL_USERHOST": '302',
            "RPL_ISON": '303',
            "RPL_AWAY": '301',
            "RPL_UNAWAY": '305',
            "RPL_NOWAWAY": '306',
            "RPL_WHOISUSER": '******',
            "RPL_WHOISSERVER": '312',
            "RPL_WHOISOPERATOR": '313',
            "RPL_WHOISIDLE": '317',
            "RPL_ENDOFWHOIS": '318',
            "RPL_WHOISCHANNELS": '319',
            "RPL_WHOWASUSER": '******',
            "RPL_ENDOFWHOWAS": '369',
            "RPL_LISTSTART": '321',
            "RPL_LIST": '322',
            "RPL_LISTEND": '323',
            "RPL_UNIQOPIS": '325',
            "RPL_CHANNELMODEIS": '324',
            "RPL_NOTOPIC": '331',
            "RPL_TOPIC": '332',
            "RPL_INVITING": '341',
            "RPL_SUMMONING": '342',
            "RPL_INVITELIST": '346',
            "RPL_ENDOFINVITELIST": '347',
            "RPL_EXCEPTLIST": '348',
            "RPL_ENDOFEXCEPTLIST": '349',
            "RPL_VERSION": '351',
            "RPL_WHOREPLY": '352',
            "RPL_ENDOFWHO": '315',
            "RPL_NAMREPLY": '353',
            "RPL_ENDOFNAMES": '366',
            "RPL_LINKS": '364',
            "RPL_ENDOFLINKS": '365',
            "RPL_BANLIST": '367',
            "RPL_ENDOFBANLIST": '368',
            "RPL_INFO": '371',
            "RPL_ENDOFINFO": '374',
            "RPL_MOTDSTART": '375',
            "RPL_MOTD": '372',
            "RPL_ENDOFMOTD": '376',
            "RPL_YOUREOPER": '381',
            "RPL_REHASHING": '382',
            "RPL_YOURESERVICE": '383',
            "RPL_TIME": '391',
            "RPL_USERSSTART": '392',
            "RPL_USERS": '393',
            "RPL_ENDOFUSERS": '394',
            "RPL_NOUSERS": '395',
            "RPL_TRACELINK": '200',
            "RPL_TRACECONNECTING": '201',
            "RPL_TRACEHANDSHAKE": '202',
            "RPL_TRACEUNKNOWN": '203',
            "RPL_TRACEOPERATOR": '204',
            "RPL_TRACEUSER": '******',
            "RPL_TRACESERVER": '206',
            "RPL_TRACESERVICE": '207',
            "RPL_TRACENEWTYPE": '208',
            "RPL_TRACECLASS": '209',
            "RPL_TRACERECONNECT": '210',
            "RPL_TRACELOG": '261',
            "RPL_TRACEEND": '262',
            "RPL_STATSLINKINFO": '211',
            "RPL_STATSCOMMANDS": '212',
            "RPL_ENDOFSTATS": '219',
            "RPL_STATSUPTIME": '242',
            "RPL_STATSOLINE": '243',
            "RPL_UMODEIS": '221',
            "RPL_SERVLIST": '234',
            "RPL_SERVLISTEND": '235',
            "RPL_LUSERCLIENT": '251',
            "RPL_LUSEROP": '252',
            "RPL_LUSERUNKNOWN": '253',
            "RPL_LUSERCHANNELS": '254',
            "RPL_LUSERME": '255',
            "RPL_ADMINME": '256',
            "RPL_ADMINLOC": '257',
            "RPL_ADMINLOC": '258',
            "RPL_ADMINEMAIL": '259',
            "RPL_TRYAGAIN": '263',
            "ERR_NOSUCHNICK": '401',
            "ERR_NOSUCHSERVER": '402',
            "ERR_NOSUCHCHANNEL": '403',
            "ERR_CANNOTSENDTOCHAN": '404',
            "ERR_TOOMANYCHANNELS": '405',
            "ERR_WASNOSUCHNICK": '406',
            "ERR_TOOMANYTARGETS": '407',
            "ERR_NOSUCHSERVICE": '408',
            "ERR_NOORIGIN": '409',
            "ERR_NORECIPIENT": '411',
            "ERR_NOTEXTTOSEND": '412',
            "ERR_NOTOPLEVEL": '413',
            "ERR_WILDTOPLEVEL": '414',
            "ERR_BADMASK": '415',
            "ERR_UNKNOWNCOMMAND": '421',
            "ERR_NOMOTD": '422',
            "ERR_NOADMININFO": '423',
            "ERR_FILEERROR": '424',
            "ERR_NONICKNAMEGIVEN": '431',
            "ERR_ERRONEUSNICKNAME": '432',
            "ERR_NICKNAMEINUSE": '433',
            "ERR_NICKCOLLISION": '436',
            "ERR_UNAVAILRESOURCE": '437',
            "ERR_USERNOTINCHANNEL": '441',
            "ERR_NOTONCHANNEL": '442',
            "ERR_USERONCHANNEL": '443',
            "ERR_NOLOGIN": '******',
            "ERR_SUMMONDISABLED": '445',
            "ERR_USERSDISABLED": '446',
            "ERR_NOTREGISTERED": '451',
            "ERR_NEEDMOREPARAMS": '461',
            "ERR_ALREADYREGISTRED": '462',
            "ERR_NOPERMFORHOST": '463',
            "ERR_PASSWDMISMATCH": '464',
            "ERR_YOUREBANNEDCREEP": '465',
            "ERR_YOUWILLBEBANNED": '466',
            "ERR_KEYSET": '467',
            "ERR_CHANNELISFULL": '471',
            "ERR_UNKNOWNMODE": '472',
            "ERR_INVITEONLYCHAN": '473',
            "ERR_BANNEDFROMCHAN": '474',
            "ERR_BADCHANNELKEY": '475',
            "ERR_BADCHANMASK": '476',
            "ERR_NOCHANMODES": '477',
            "ERR_BANLISTFULL": '478',
            "ERR_NOPRIVILEGES": '481',
            "ERR_CHANOPRIVSNEEDED": '482',
            "ERR_CANTKILLSERVER": '483',
            "ERR_RESTRICTED": '484',
            "ERR_UNIQOPPRIVSNEEDED": '485',
            "ERR_NOOPERHOST": '491',
            "ERR_NOSERVICEHOST": '492',
            "ERR_UMODEUNKNOWNFLAG": '501',
            "ERR_USERSDONTMATCH": '502',
            "NOTICE": 'NOTICE',
        }

        # Diccionario de codigos de IRC invertido ¿Innecesario? proximamente sera removido
        self.num_to_sym = {}
        for k, v in self.sym_to_num.items():
            self.num_to_sym[v] = k
    
    # Metodo de conexion, crea el socket, instancia el handler de mensajes y los threads de envio/recepcion
    def connect(self):
        # Se crea socket de conexion
        self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        self.socket.connect((self.server, self.port))

        # Bandera de conexion, utilizada en thread principal iniciador para re-conectar
        self.running = True

        # Cola de mensajes tipo FIFO
        self.queue = deque([])
        self.data = {}
        # Data format: {'#channel_name': {'joined': True, 'users': [], '~': [], '&': [], '@': [], '%': [], '+': []}}
        
        self.identified = {}
        # Data format: {'XXCiro': 1, 'pab_mac': 3}

        # Objeto encargado de manejar los mensajes '.<comando> [opciones]', se ignora POO estricto a fines de encapsulacion severa
        self.handler = FEAHandler(self)

        # Thread de recepcion, permite paralelizar envios/recepciones con demas procesos
        self.inputThread = threading.Thread(target=self.processInput)
        # Threads como demonio, morira cuando muera el thread original
        self.inputThread.daemon = True
        # Se dispara
        self.inputThread.start()

        # Thread de envio, permite paralelizar envios/recepciones con demas procesos
        self.outputThread = threading.Thread(target=self.processOutput)
        # Threads como demonio, morira cuando muera el thread original
        self.outputThread.daemon = True
        # Se dispara
        self.outputThread.start()

        # Se añaden mensajes de identificacion a la cola de mensajes
        self.queue_append('NICK %s' % self.nick)
        self.queue_append('USER %s %s %s : %s' % (self.nick, self.nick, self.nick, self.nick))
        
        # Se para acá hasta que muera el thread de salida, cuando esto termine se retomara el thread original de forma externa, permitiendo entrar al bucle de reconexion
        self.outputThread.join()

    # Metodo de desconexion, solo llamado de forma externa via una señal de sistema (ctrl + c)
    def disconnect(self, signum, frame):
        # Bandera para evitar reconexion
        self.dont_reconnect = True
        # Muere thread principal, mueren hijos demonios \o/
        exit(0)

    # Metodo de entrada, solo llamado como metodo del thread de entrada
    def processInput(self):
        # Mientras se este conectado
        while self.running:
            # Se lee un segmento del socket
            readbuffer = self.socket.recv(4096)
            try:
                # Separamos segmento leido en lineas "humanas"
                lines = str(readbuffer).split('\n')

                # Se procesa cada linea
                for line in lines:
                    # Si la linea tiene longitud aceptable y no es un mero salto de linea o similar
                    if(len(line.strip()) > 0):
                        # DEBUG: Mostramos la linea en la consola
                        # print(line)
                        # Interpretamos linea
                        try:
                            prefix = ''
                            trailing = []
                        
                            if(line[0] == ':'):
                                prefix, line = line[1:].split(' ', 1)
                        
                            if(line.find(' :') != -1):
                                line, trailing = line.split(' :', 1)
                                params = line.split()
                                params.append(trailing)
                            else:
                                params = line.split()
                        
                            command = params.pop(0).upper()
                        
                            if(command in self.num_to_sym):
                                command = self.num_to_sym[command]
                            
                            # Se busca el metodo de acuerdo al codigo de IRC del mensaje
                            method = getattr(self, 'RESPONSE_%s' % command, None)
                            
                            # Si se encontro un metodo implementado, se invoca en un hilo nuevo y se procede con los demas mensajes
                            # sino, se envia al manejador de codigos desconocidos donde es ignorado ¿Innecesario?
                            if(method is not None):
                                # DEBUG: 
                                th = threading.Thread(target=method, args=(prefix, params))
                                # DEBUG: 
                                th.daemon = True
                                # DEBUG: 
                                th.start()
                            else:
                                self.RESPONSE_UNKNOWN(prefix, command, params)
                        except:
                            # Si hubo una excepcion cuando se interpretaba la linea, entonces la misma esta corrupta
                            # Puede ser porque el segmento corto la linea a la mitad, lo ignoramos. ¿Solucion real?
                            # DEBUG: print("Bad line, ignore it")
                            print(sys.exc_info())
                            pass
            except:
                # Se ignoran errores extras no manejados
                print(sys.exc_info())
                pass

            # Util para bajar el nivel del CPU del 99% al ~1%
            time.sleep(0.2)
        
    # Metodo de salida, solo llamado como metodo del thread de salida
    def processOutput(self):
        # Se implementa un mini-control de flood, solo se envian mensajes cada 0.7 segundos
        flood = 0

        # Mientras se este conectado
        while self.running:
            if(len(self.queue) > 0):
                if(flood <= (time.time() - 0.7)):
                    flood = time.time()

                    # Enviar mensaje y retirarlo de la lista
                    out = self.queue.popleft()
                    try:
                        self.socket.send(out)
                    except UnicodeEncodeError:
                        pass
                    
                    # DEBUG: Mostramos la linea en la consola
                    # print(out)

            # Si murio el thread de entrada se corta este metodo, matando consecuentemente el thread invocador
            if(self.inputThread.is_alive() == False):
                # DEBUG: 
                # print('inputThread not alive.\n')
                break

            # Util para bajar el nivel del CPU del 99% al ~1%
            time.sleep(0.2)

    # Metodo manejador de mensajes tipo PING, solo llamado desde processInput
    # Se debe responder con el correspondiente PONG para evitar desconexion desde el servidor
    def RESPONSE_PING(self, prefix, params):
        self.queue_append('PONG :%s' % params[-1])

    # Metodo manejador de mensajes tipo RPL_NAMREPLY, solo llamado desde processInput
    # Se recibe luego de conectarse a un canal, es la lista de los nicks unidos al mismo
    def RESPONSE_RPL_NAMREPLY(self, prefix, params):
        # Se almacenan los datos del canal, especificamente los usuarios y sus niveles de acceso
        users = params[3].strip('\r\n').split(' ')
        channel = params[2].strip('\r\n').lower()

        self.data[channel]['users'] = [x.translate(None, '~&@%+') for x in users]
        
        for level in ['~', '&', '@', '%', '+']:
            self.data[channel][level] = [u[1:] for u in users if(u[0] == level)]

    # Metodo manejador de mensajes tipo RPL_WELCOME, solo llamado desde processInput
    # Se recibe luego de entrar al servidor de IRC, luego se envian datos de identificacion
    # Implementado aqui para asegurar identificacion en tiempo correcto
    def RESPONSE_RPL_WELCOME(self, prefix, params):
        # Enviamos mensaje de identificacion al servicio de NickServ
        self.queue_append('PRIVMSG NickServ :IDENTIFY %s\n' % self.password)

        # Añadimos a la cola de mensajes para entrar a todos los canales del bot
        for channel in self.channels:
            if(channel[1] is None):
                self.queue_append('JOIN %s' % channel[0])
            else:
                self.queue_append('JOIN %s %s' % (channel[0], channel[1]))

    # Metodo manejador de mensajes tipo JOIN, solo llamado desde processInput
    # Se recibe cuando o bien el bot entra a un canal o un nick entra al canal donde esta el bot
    def RESPONSE_JOIN(self, prefix, params):
        # Un nick entro a un canal, o el mismo bot lo hizo

        user = prefix[0:prefix.index('!')]
        channel = params[0].strip('\r\n').lower()

        if(user == self.nick):
            # Bandera interna del canal, ¿En desuso?
            self.data[channel] = {'joined': True}
        else:
            # Se agrega nick a la lista de usuarios del canal, si tiene algun tipo de nivel de acceso el mismo se indicara mediante otro mensaje tipo MODE desde el servidor por lo que nada se hace aqui
            self.data[channel]['users'].append(user)



    # Metodo manejador de mensajes tipo MODE, solo llamado desde processInput
    # Se recibe cuando hay un cambio de modo en algun canal
    def RESPONSE_MODE(self, prefix, params):
        # {'q':'~', 'a':'&', 'o':'@', 'h':'%', 'v':'+'}
        # Ocurrio un cambio de modo a cierto nick, se actualizan las listas de accesos del nick en canal correspondiente
        if(params[0] != self.nick):
            channel = params[0].lower()
            user = params[2]

            for level in ['q', 'a', 'o', 'h', 'v']:
                if(params[1][1] == level):
                    # Transformamos el char a su correspondencia en los indices usados, ya que difieren del estandar (Podria cambiarse, pero tiene muchas referencias fijas)
                    level = level.translate(string.maketrans('qaohv', '~&@%+'))

                    # Si se agrega el modo, se agrega a la lista, sino, se lo elimina de ella
                    try:
                        if(params[1][0] == '+'):
                            self.data[channel][level].append(user)
                        else:
                            self.data[channel][level].remove(user)
                        break
                    except:
                        pass

    # Metodo manejador de mensajes tipo PART, solo llamado desde processInput
    # Se recibe cuando un nick sale de un canal donde esta el bot
    def RESPONSE_PART(self, prefix, params):
        # Un nick salio del canal, por lo que se eliminan todas sus referencias en los datos del correspondiente canal
        user = prefix[0:prefix.index('!')]
        channel = params[0].lower()

        self.data[channel]['users'].remove(user)

        for level in ['~', '&', '@', '%', '+']:
            try:
                self.data[channel][level].remove(user)
            except:
                pass

    # Metodo manejador de mensajes tipo QUIT, solo llamado desde processInput
    # Se recibe cuando un nick se desconecta del servidor y este esta en un mismo canal que el bot
    def RESPONSE_QUIT(self, prefix, params):
        # Un nick se desconecto, por lo que se eliminan todas sus referencias en los datos de canales
        user = prefix[0:prefix.index('!')]

        for channel in self.data:
            try:
                self.data[channel]['users'].remove(user)

                for level in ['~', '&', '@', '%', '+']:
                    try:
                        self.data[channel][level].remove(user)
                    except:
                        pass
            except:
                pass

    # Metodo manejador de mensajes tipo NICK, solo llamado desde processInput
    # Se recibe cuando hay un cambio de nick de alguien que comparte canal con el bot
    def RESPONSE_NICK(self, prefix, params):
        # Hubo un cambio de nick, se actualizan todas las referencias del nick viejo al nick nuevo
        nick_old = prefix[0:prefix.index('!')]
        nick_new = params[0].strip('\r\n')

        for channel in self.data:
            try:
                self.data[channel]['users'].remove(nick_old)
                self.data[channel]['users'].append(nick_new)

                for level in ['~', '&', '@', '%', '+']:
                    try:
                        self.data[channel][level].remove(nick_old)
                        self.data[channel][level].append(nick_new)
                    except:
                        pass
            except:
                pass

    # Metodo manejador de mensajes tipo KICK, solo llamado desde processInput
    # Se recibe cuando un nick es echado del mismo canal que el bot o bien cuando echan al bot del canal
    def RESPONSE_KICK(self, prefix, params):
        # Si se echa al mismo bot, entonces se borran datos del canal para ahorrar memoria
        # sino, se quita al usuario echado de la informacion del canal y de cualquier lista de acceso que integrase
        if(params[1] == self.nick):
            channel = params[0].strip('\r\n').lower()

            self.data.pop(channel, None)
        else:
            channel = params[0].lower()
            user = params[1]

            self.data[channel]['users'].remove(user)

            for level in ['~', '&', '@', '%', '+']:
                try:
                    self.data[channel][level].remove(user)
                except:
                    pass

    # Metodo manejador de mensajes tipo PRIVMSG, solo llamado desde processInput
    # Se recibe cuando llega un mensaje normal a un canal donde esta el bot, o bien por query al mismo
    def RESPONSE_PRIVMSG(self, prefix, params):
        # Prefix: [email protected]
        # Params: ['LeBot', '.sarasa\r']
        # 		  ['#FEA', '.sarasa\r']

        # Si el cuerpo del mensaje inicia con un punto es un comando del bot
        if(params[1][0] == '!'):
            command = params[1].strip('\r\n')[1:]

            if(command.find(' ') > 0):
                command = command[0:command.index(' ')]

            params[1] = params[1][1 + len(command):].lstrip(' ')

            # Se presume mensaje no privado a menos que se demuestre lo contrario
            is_pm = False

            if(params[0] == self.nick):
                is_pm = True

            # Se busca un manejador para el comando dentro del objeto Handler()
            method = getattr(self.handler, 'HANDLE_%s' % command.upper(), None)
        
            # Si el metodo esta implementado se lo llama, si ocurre un error al ejecutarlo se envia un mensaje notificando el error de ejecucion
            if(method is not None):
                sender = prefix[0:prefix.index('!')]

                try:
                    method(sender, params, is_pm)
                except:
                    # DEBUG: Mostramos en consola errores para debuguear ¡Nunca en produccion!
                    print(sys.exc_info())
                    raise

                    # Se envia por PM o al canal segun sea un comando enviado por query o no
                    if(is_pm):
                        self.queue_append('PRIVMSG %s :%s' % (sender, 'Error al ejecutar el comando.'))
                    else:
                        self.queue_append('PRIVMSG %s :%s' % (params[0], 'Error al ejecutar el comando.'))
        else:
            # Mensaje normal, se ignora
            pass

    # Metodo manejador de mensajes tipo ERROR, solo llamado desde processInput
    # Llega cuando ocurre un error, como por ejemplo, reconectarse demasiado rapido
    def RESPONSE_ERROR(self, prefix, params):
        if(params[0].strip('\r') == 'Trying to reconnect too fast.'):
            # DEBUG: print('Waiting...')
            time.sleep(10)

        # Se coloca la bandera en falsa para forzar una desconexion, luego se reconectara debido al bucle externo del thread principal
        self.running = False

    # Metodo manejador de mensajes tipo NOTICE, solo llamado desde processInput
    # Llega para comunicar entre el servidor y el cliente de IRC
    def RESPONSE_NOTICE(self, prefix, params):
        # Prefix: [email protected]
        # Params: ['LeBot', 'STATUS XXCiro|BNC 3\r']
        if(len(params) == 2):
            parts = params[1].strip('\r\n').split(' ')
            if(parts[0] == 'STATUS'):
                self.identified[parts[1]] = parts[2]


    # Metodo manejador de mensajes tipo UNKNOWN, solo llamado desde processInput
    # Llega cuando no existe un metodo correcto implementado para resolver el codigo de algun mensaje de IRC
    def RESPONSE_UNKNOWN(self, prefix, command, params):
        pass

    # Metodo para codificar un string unicode como cadena bytestring de charset UTF-8
    def encode(self, text):
        return text

    # ---------------------------------------------------------------------------------- #
    # @ PUBLIC METHODS                                                                   #
    # ---------------------------------------------------------------------------------- #

    # Agregar mensaje a la lista de envio, mensajes tipo materia prima respetando el estandar de IRC
    def queue_append(self, *messages):
        for msg in messages:
            msg = '%s\r\n' % msg.strip('\r\n')
            self.queue.append(self.encode(msg))

    # Recargar permisos desde la hoja de calculo correspondiente
    def refresh_permission(self):
        self.permissions = self.google.get_permissions(True)

    # Recargar ordenes desde la hoja de calculo correspondiente
    def refresh_orders(self):
        self.orders = self.google.get_orders(True)

    # Subir permisos a la hoja de calculo correspondiente
    def update_permission(self):
        self.google.set_permissions(self.permissions)

    # Subir canales a la hoja de calculo correspondiente
    def update_channels(self):
        self.google.set_channels(self.channels)

    # Subir canales a la hoja de calculo correspondiente
    def update_channels_events(self):
        self.google.set_channels_events(self.channels_events)

    # Agregar canal a la lista de canales temporales, se perdera si no se sube a la hoja de calculo
    def add_channel(self, channel, password=''):
        self.channels.append((channel, password))

    # Agregar canal a la lista de canales temporales, se perdera si no se sube a la hoja de calculo
    def add_channel_event(self, channel):
        self.channels_events.append(channel)

    # Agregar nick a la lista del diccionario de permisos temporales, se perdera si no se sube a la hoja de calculo
    def add_permission(self, nick, permission):
        self.permissions[permission].append(nick)

    # Eliminar nick de la lista del diccionario de permisos temporales, se perdera si no se sube a la hoja de calculo
    def del_permission(self, nick, permission):
        self.permissions[permission].remove(nick)

    # Eliminar canal de la lista de canales temporales, se perdera si no se sube a la hoja de calculo
    def del_channel(self, channel):
        for ch in self.channels:
            if ch[0] == channel:
                self.channels.remove(ch)
                break

    # Eliminar canal de la lista de canales temporales para evento, se perdera si no se sube a la hoja de calculo
    def del_channel_event(self, channel):
        for ch in self.channels_events:
            if ch.lower() == channel.lower():
                self.channels_events.remove(ch)
                break

    # Verificar si cierto nick tiene el nivel especificado o superior en el canal indicado
    def has_level(self, channel, nick, level):
        perm_to_level = {'~': 5, '&': 4, '@': 3, '%': 2, '+': 1}

        for n in ['~', '&', '@', '%', '+']:
            if nick in self.data[channel.lower()][n]:
                if(perm_to_level[n] >= perm_to_level[level]):
                    return True

        return False

    # Verificar si cierto nick tiene cierto permiso o superior
    def has_permission(self, nick, permission):
        self.queue_append('PRIVMSG NickServ :STATUS %s\n' % nick)

        while nick not in list(self.identified.keys()):
            # Util para bajar el nivel del CPU del 99% al ~1%
            time.sleep(0.2)

        if self.identified[nick] != '3':
            del self.identified[nick]
            return False

        del self.identified[nick]

        perm_to_level = {'~': 5, '&': 4, '@': 3, '%': 2, '+': 1}

        for n in self.permissions:
            if nick in self.permissions[n]:
                if(perm_to_level[n] >= perm_to_level[permission]):
                    return True

        return False