Exemple #1
0
  def get(self):
    try:
      redirect_uri = self.request.protocol + "://" + self.request.host + "/" + client_info['web']['redirect_uris'][0]
      flow = flow_from_clientsecrets('client_secrets.json', scope=client_info['web']['scope'], redirect_uri=redirect_uri)
      code = self.get_argument("code", None)
      if code is None:
        db.log(email=None, ip=self.request.remote_ip, module="AuthorizerReturned", msg='rejected immersion')
        self.redirect(self.reverse_url('index'))
        return
      studyid = self.get_current_study()
      credentials = flow.step2_exchange(code)
      uri = "https://www.googleapis.com/oauth2/v2/userinfo/?alt=json" # user info uri
      userinfo = self.get_api_response(credentials, uri)
      email = userinfo['email']
      # we practically have the email now

      # store refresh token
      state = db.getState(email, studyid)

      if state is None:
        state = {'email': email, 'userinfo' : userinfo, 'lastuid': 0, 'version': -1} # new user
        if studyid: state['studyid'] = studyid

      # backward compatibility, store userinfo again anyway
      state['userinfo'] = userinfo

      # always store the new credentials
      state['credentials'] = credentials.to_json()
      db.storeState(email, studyid, state)

      # we store a secure cookie to remember that user is logged in
      self.set_secure_cookie("email", email)

      # only add if there is no other task in the queue for the same person
      if not db.hasTask(email, studyid):
        db.pushTask(email, studyid)
        print 'Pushtask with', studyid
        db.log(email=email, ip=self.request.remote_ip, module="AuthorizerReturned", msg='Added a fetching task')

      self.redirect(self.reverse_url('viz'))
    except:
      db.log(email=None, ip=self.request.remote_ip, module="AuthorizerReturned", msg=traceback.format_exc(), level=logging.ERROR)
      self.redirect(self.reverse_url('busy'))
Exemple #2
0
def fetchGmail():
    while True:
        email = None
        task = None
        gc.collect()
        task = db.popTask()
        if task is None:
            time.sleep(3)
            continue
        if task['timestamp'] > datetime.now(pytz.UTC):  # if the task is to be served in the future
            db.pushTaskObject(task)
            time.sleep(3)
            continue
        try:
            email = task['email']
            imap_conn = None
            print 'processing', email  # , 'that was queued at', task['timestamp']
            state = db.getState(email, None)
            print "Using fetcher", state
            if state is None: continue
            lastuid = int(state['lastuid'])
            credentials = oauth2client.client.OAuth2Credentials.from_json(state['credentials'])
            version = int(state['version'])

            # renew access token if expired
            if credentials.access_token_expired:
                credentials.refresh(httplib2.Http())

            authstring = GenerateOAuth2String(email, credentials.access_token, base64_encode=False)
            imap_conn = ADV_IMAP4_SSL('imap.gmail.com')
            imap_conn.authenticate('XOAUTH2', lambda x: authstring)
        except KeyboardInterrupt:
            # add the task again for fetching
            if email: db.pushTask(email, None)
            print 'interrupted'
            return
        except:
            db.log(email=email, ip=None, module="fetcher", msg=traceback.format_exc(), level=logging.ERROR)
            if imap_conn: imap_conn.logout()
            continue
        try:
            all = getAllMailMailbox(imap_conn)
            if all is None:
                imap_conn.logout()
                db.log(email=email, ip=None, module="fetcher", msg="all mail not enabled")
                state['imap'] = True
                db.storeState(email, None, state)
                # add the fetching task again to the queue with 5min delay
                task['timestamp'] = datetime.now(pytz.UTC) + timedelta(minutes=3)
                db.pushTaskObject(task)
                continue
            elif 'imap' in state:
                del state['imap']
                db.storeState(email, None, state)
            # db.markTaskForImap(email)
            # db.markTaskForEmail(email)

            imap_conn.select(all)
            state['working'] = True
            db.storeState(email, None, state)

            append = False
            firstTime = False
            if lastuid > 0:
                emails = db.getEmails(email, version, None)
                append = True
            else:
                emails = []
                firstTime = True
            ok, data = imap_conn.uid('search', None, 'UID', str(lastuid + 1) + ':*')
            uids = [int(d) for d in data[0].split()]
            uids = uids[-LIMIT_NEMAILS:]

            # ignore if the last uid is less or equal to the result
            if len(uids) == 1 and lastuid >= uids[0]: uids = []

            total = len(uids)
            db.log(email=email, ip=None, module="fetcher", msg=str(total) + " new emails since last login")
            loaded = 0
            start = 0

            fetchtime = 0
            parsingtime = 0
            while loaded < total:
                tmptime = time.time()
                # print str(uids[min(start, len(uids)-1)])+ ":" + str(uids[min(start+JUMP-1, len(uids)-1)])
                ok, data = imap_conn.uid('fetch', str(uids[min(start, len(uids) - 1)]) + ":" + str(
                    uids[min(start + JUMP - 1, len(uids) - 1)]),
                                         '(UID X-GM-LABELS FLAGS X-GM-THRID BODY.PEEK[HEADER.FIELDS (FROM TO CC Date)])')
                fetchtime += (time.time() - tmptime)
                # for each email
                tmptime = time.time()
                for i in xrange(0, len(data), 2):
                    loaded += 1
                    emails.append(data[i])

                parsingtime += (time.time() - tmptime)
                perc = (loaded * 100.0) / total
                if len(emails) >= REFRESH_NETWORK or loaded >= total:
                    if append:
                        db.storeEmails(email, emails, version, None)
                        append = False
                    else:
                        # store the file
                        db.storeEmails(email, emails, version + 1, None)
                        state['version'] = version + 1
                        version += 1
                    # update state
                    state['lastuid'] = uids[min(start + JUMP - 1, len(uids) - 1)]
                    db.storeState(email, None, state)
                    emails = []
                    db.log(email=email, ip=None, module="fetcher", msg="new version %s stored in the db" % version)
                start += JUMP
            imap_conn.logout()
            if firstTime:
                db.pushNotifyDone(email)
                db.log(email=email, ip=None, module="fetcher", msg="marked for email")
            db.log(email=email, ip=None, module="fetcher",
                   msg="done fetching. Network time: %ds. Parsing time: %ds." % (fetchtime, parsingtime))
            # state = db.getState(email)
            if 'working' in state: del state['working']
            # delete the refresh tokens for security reasons
            if 'credentials' in state: del state['credentials']
            db.storeState(email, None, state)
        except KeyboardInterrupt:
            # add the task again for fetching
            if email: db.pushTask(email, None)
            print 'interrupted'
            return
        except:
            db.log(email=email, ip=None, module="fetcher", msg=traceback.format_exc(), level=logging.ERROR)
            if imap_conn: imap_conn.logout()
            # add the task again for fetching
            if email: db.pushTask(email, None)
Exemple #3
0
def fetch():
  while True:
    email = None
    task = None
    gc.collect()
    task = db.popOutlookTask()
    if task is None:
      time.sleep(3)
      continue
    if task['timestamp'] > datetime.now(pytz.UTC): # if the task is to be served in the future
      db.pushOutlookTaskObject(task)
      time.sleep(3)
      continue
    try:
      email = task['email']
      print 'processing', email#, 'that was queued at', task['timestamp']
      state = db.getState(email, None)
      print "Using fetcher", state
      if state is None: continue
      lastuid = state['lastuid']
      credentials = state['credentials']
      access_token = credentials['access_token']
      version = int(state['version'])

      # # renew access token if expired
      # if credentials.access_token_expired:
      #   credentials.refresh(httplib2.Http())

    except KeyboardInterrupt:
      # add the task again for fetching
      if email: db.pushOutlookTask(email, None)
      print 'interrupted'
      #kill all the process of outlookfetcher.py running
      p = subprocess.Popen(['ps', '-A'], stdout=subprocess.PIPE)
      out, err = p.communicate()
      for line in out.splitlines():
        if 'outlookfetcher' in line:
          pid = int(line.split(None, 1)[0])
          os.kill(pid, signal.SIGKILL)
      return 
    except:
      db.log(email=email, ip=None, module="fetcher", msg=traceback.format_exc(), level=logging.ERROR)
      continue
    try:
      all = get_me(access_token)
      if all is None:
        db.log(email=email, ip=None, module="fetcher", msg="all mail not enabled")
        db.storeState(email, None, state)
        # add the fetching task again to the queue with 5min delay
        task['timestamp'] = datetime.now(pytz.UTC) + timedelta(minutes=3)
        db.pushOutlookTaskObject(task)
        continue

      state['working'] = True
      db.storeState(email, None, state)

      append = False
      firstTime = False
      if lastuid > 0:
        emails = db.getEmails(email, version, None)
        append = True
      else:
        emails = []
        firstTime = True
      email_ids = get_my_messages_ids(access_token, 0, email)
      # print email_ids
      uids = [d['id'] for d in email_ids['value']]
      while '@odata.nextLink' in email_ids:
          skip = email_ids['@odata.nextLink'][email_ids['@odata.nextLink'].index('skip=')+5:email_ids['@odata.nextLink'].index('&$order')]
          print 'getting ids new skip', skip
          email_ids = get_my_messages_ids(access_token, skip, email)
          for d in email_ids['value']:
              uids.append(d['id'])
          print "email ids length", len(uids)

          # if '@odata.nextLink' not in email_ids:
          #     skip = int(skip) + len(email_ids['value'])
          #     email_ids = get_my_messages_ids_less(access_token, skip, email)
          #     for d in email_ids['value']:
          #         uids.append(d['id'])
          #     print "email ids length less", len(uids)
      uids = uids[-LIMIT_NEMAILS:]
      # print "lastuid", lastuid
      base_skip = 0
      if lastuid > 0 and lastuid in uids:
        base_skip = uids.index(lastuid)+1
        uids = uids[uids.index(lastuid)+1:]
      
      # ignore if the last uid is less or equal to the result 
      if len(uids)==0: uids = []
      
      total = len(uids)
      if total > LIMIT_NEMAILS:
        base_skip += (total - LIMIT_NEMAILS)
      db.log(email=email, ip=None, module="fetcher", msg=str(total) + " new emails since last login")
      loaded = 0
      start = 0
      print "total", total
      
      fetchtime = 0
      parsingtime = 0
      skip = base_skip
      email_data0 = None
      first_time = True
      if len(emails) >= REFRESH_NETWORK:
          emails = []
          append = False
      while loaded < total:
        tmptime = time.time()
        if email_data0 is not None and '@odata.nextLink' in email_data0:
            # print email_data0['@odata.nextLink']
            first_time = False
            skip = email_data0['@odata.nextLink'][email_data0['@odata.nextLink'].index('skip=')+5:]
            if skip.find('&') != -1:
                skip = skip[:skip.index('&')]
        # elif email_data0 is not None:
        #     #see if it's really no more emails or just api error
        #     first_time = False
        #     skip = int(skip) + len(email_data)
        #     email_data0 = get_my_messages_less(access_token, email, skip, total, base_skip, first_time)
        #     email_data = email_data0['value']
        #     print "email_data less", len(email_data)
        #
        #     fetchtime += (time.time() - tmptime)
        #     # for each email
        #     tmptime = time.time()
        #     if append == False:
        #         for i in xrange(0, len(email_data)):
        #             loaded += 1
        #             # print email_data[i]
        #             parsed_email = db.formatOutlookEmails(email, email_data[i])
        #             # print parsed_email
        #             if parsed_email != None:
        #                 emails.append(parsed_email)
        #     else:
        #         for i in xrange(0, len(email_data)):
        #             loaded += 1
        #             parsed_email = db.formatOutlookEmails(email, email_data[i])
        #             # print parsed_email
        #             emails.append(parsed_email)
        #     parsingtime += (time.time() - tmptime)

        print "getting messages, skip now is", skip
        email_data0 = get_my_messages(access_token, email, skip, total, base_skip, first_time)
        email_data = email_data0['value']
        print "email_data", len(email_data)
        fetchtime += (time.time() - tmptime)
        # for each email
        tmptime = time.time()
        if append == False:
          for i in xrange(0, len(email_data)):
            loaded += 1
            # print email_data[i]
            parsed_email = db.formatOutlookEmails(email, email_data[i])
            # print parsed_email
            if parsed_email != None:
                emails.append(parsed_email)
        else:
          for i in xrange(0, len(email_data)):
            loaded += 1
            parsed_email = db.formatOutlookEmails(email, email_data[i])
            # print parsed_email
            emails.append(parsed_email)

        parsingtime += (time.time() - tmptime)
        perc = (loaded*100.0)/total
        if len(emails) >= REFRESH_NETWORK or loaded >= total:
          if append:
            db.storeEmails(email, emails, version, None)
            append = False
          else:
            # store the file
            db.storeEmails(email, emails, version + 1, None)
            state['version'] = version + 1
            version+=1
          # update state
          print "emails length", len(emails)
          state['lastuid'] = uids[min(start+JUMP-1, len(uids)-1)]
          db.storeState(email, None, state)
          emails = []
          db.log(email=email, ip=None, module="fetcher", msg="new version %s stored in the db" % version)        
        start+=JUMP
      if firstTime:
        db.log(email=email, ip=None, module="fetcher", msg="marked for email")
      db.log(email=email, ip=None, module="fetcher", msg="done fetching. Network time: %ds. Parsing time: %ds." % (fetchtime, parsingtime))    
      #state = db.getState(email)
      if 'working' in state: del state['working']
      # delete the refresh tokens for security reasons
      if 'credentials' in state: del state['credentials']
      db.storeState(email, None, state)
    except KeyboardInterrupt:
      # add the task again for fetching
      if email: db.pushOutlookTask(email, None)
      print 'interrupted'
      # kill all the process of outlookfetcher.py running
      p = subprocess.Popen(['ps', '-A'], stdout=subprocess.PIPE)
      out, err = p.communicate()
      for line in out.splitlines():
        if 'outlookfetcher' in line:
          pid = int(line.split(None, 1)[0])
          os.kill(pid, signal.SIGKILL)
      return
    except:
      db.log(email=email, ip=None, module="fetcher", msg=traceback.format_exc(), level=logging.ERROR)
      # add the task again for fetching
      if email: db.pushOutlookTask(email, None)