Example #1
0
def batchDeleteMails(service):
    if not to_delete_ids:
        return 1
    print('Deleting e-mails...')
    try:
        # batch delete messages as 1000 msg limit for regular users
        batch_limit = 1000
        iterations = math.ceil(len(to_delete_ids) / batch_limit)
        skip, take = 0, batch_limit
        print(f'Total Messages: {len(to_delete_ids)},\
            Target Iterations: {iterations}')

        batch = BatchHttpRequest(callback=deleteCallback)

        for _ in range(iterations):
            payload = {'ids': []}
            payload['ids'].extend(
                [str(d['id']) for d in to_delete_ids[skip:take]])
            batch.add(service.users().messages().batchDelete(userId=USER_ID,
                                                             body=payload))
            skip = take
            take = len(to_delete_ids) if (take + batch_limit >=
                                          len(to_delete_ids))\
                else take + batch_limit

        batch.execute()
    except errors.HttpError as ex:
        print(f'Exception:{ex}')
        return 1
def batched_messages_get(oauth_result, message_ids, params=dict()):
    email = oauth_result.email
    credentials = oauth_result.credentials
    results_list = list()

    # Google's API client throws away the callback's return value inside
    # BatchHttpRequest._callback, during BatchHttpRequest.execute.
    # The solution, in this case, is to define the callback in the scope that
    # gives it access to the data structure we'll be returning the results in.
    def process_batch_responses(request_id, response, exception):
        if exception is not None:
            results_list.append(exception)
        else:
            results_list.append(response)

    batch = BatchHttpRequest()
    service = build('gmail', 'v1', http=httplib2.Http())
    users_resource = service.users()
    users_messages = users_resource.messages()

    for each_id in message_ids:
        api_call = users_messages.get(userId=email, id=each_id)
        api_call.uri = add_query_parameters(api_call.uri, params)
        batch.add(api_call, callback=process_batch_responses)

    execute_api_call(credentials, batch)
    return results_list
Example #3
0
def main():
    """
    GMail Dump
    * Get & Store credentials
    * Download all GMail messages into Maildir folder named mail
    """
    credentials = get_credentials()
    http = credentials.authorize(httplib2.Http())
    service = discovery.build('gmail', 'v1', http=http)

    results = service.users().labels().list(userId='me').execute()
    labels = results.get('labels', [])

    profile = service.users().getProfile(userId='me').execute()
    # example: {'historyId': '380957', 'emailAddress': 'user@domain', 'threadsTotal': 1135, 'messagesTotal': 1950}
    print("Downloading {messagesTotal} messages for {emailAddress}".format(**profile))

    for new_dir in ['tmp','new','cur']:
        os.makedirs("mail/%s" % new_dir, mode=0o700, exist_ok=True)

    def process_message(request_id, response, exception):
        if exception is not None:
            print("ERROR: " + request_id)
        else:
            msg_bytes = base64.urlsafe_b64decode(response['raw'].encode('ASCII'))
            mime_msg = email.message_from_bytes(msg_bytes)
            maildir_message = mailbox.MaildirMessage(mime_msg)
            #box.add(maildir_message)
            message_id = response['id']
            with open("mail/cur/%s" % message_id, "wb") as message_file:
                message_file.write(maildir_message.__bytes__())

    try:
        message_count = 0
        start = True
        while start or 'nextPageToken' in response:
            if start:
                page_token = None
                start = False
            else:
                page_token = response['nextPageToken']
            response = service.users().messages().list(userId='me', pageToken=page_token).execute()
            if 'messages' in response:
                message_count += len(response['messages'])
                existing_message_count = 0
                batch = BatchHttpRequest(callback=process_message)
                for message in response['messages']:
                    message_id = message['id']
                    if os.path.exists('mail/cur/%s' % message_id):
                        existing_message_count+=1
                    else:
                        batch.add(service.users().messages().get(userId='me', format='raw', id=message_id))
                batch.execute()
                info = "Downloaded %s messages" % message_count
                if existing_message_count:
                    info += " (skipping %s messages already downloaded)" % existing_message_count
                print(info)

    except errors.HttpError as error:
        print('An HTTPError occurred: %s' % error)
Example #4
0
    def test_execute_initial_refresh_oauth2(self):
        batch = BatchHttpRequest()
        callbacks = Callbacks()
        cred = MockCredentials('Foo')

        # Pretend this is a OAuth2Credentials object
        cred.access_token = None

        http = HttpMockSequence([
            ({
                'status': '200',
                'content-type': 'multipart/mixed; boundary="batch_foobarbaz"'
            }, BATCH_SINGLE_RESPONSE),
        ])

        cred.authorize(http)

        batch.add(self.request1, callback=callbacks.f)
        batch.execute(http=http)

        self.assertEqual({'foo': 42}, callbacks.responses['1'])
        self.assertIsNone(callbacks.exceptions['1'])

        self.assertEqual(1, cred._refreshed)

        self.assertEqual(1, cred._authorized)

        self.assertEqual(1, cred._applied)
Example #5
0
  def test_execute_request_body_with_custom_long_request_ids(self):
    batch = BatchHttpRequest()

    batch.add(self.request1, request_id='abc'*20)
    batch.add(self.request2, request_id='def'*20)
    http = HttpMockSequence([
      ({'status': '200',
        'content-type': 'multipart/mixed; boundary="batch_foobarbaz"'},
        'echo_request_body'),
      ])
    try:
      batch.execute(http=http)
      self.fail('Should raise exception')
    except BatchError as e:
      boundary, _ = e.content.split(None, 1)
      self.assertEqual('--', boundary[:2])
      parts = e.content.split(boundary)
      self.assertEqual(4, len(parts))
      self.assertEqual('', parts[0])
      self.assertEqual('--', parts[3].rstrip())
      for partindex, request_id in ((1, 'abc'*20), (2, 'def'*20)):
        lines = parts[partindex].splitlines()
        for n, line in enumerate(lines):
          if line.startswith('Content-ID:'):
            # assert correct header folding
            self.assertTrue(line.endswith('+'), line)
            header_continuation = lines[n+1]
            self.assertEqual(
              header_continuation,
              ' %s>' % request_id,
              header_continuation
            )
Example #6
0
def list_email_ids_by_label(service, label):
    email_ids = []
    max_results = 100
    next_page_token = ''
    # create the batch request
    batch_requests = BatchHttpRequest()

    while True:
        response = service.users().messages().list(
            userId='me',
            labelIds=label,
            maxResults=max_results,
            pageToken=next_page_token).execute()
        # extract id
        for msg_id in response['messages']:
            email_ids.append(msg_id['id'])
        print(len(email_ids))

        if 'nextPageToken' in response:
            print('next page token:', response['nextPageToken'])
            next_page_token = response['nextPageToken']
        else:
            break
        # uncomment for testing cap 500
        break
    return email_ids
  def test_add_fail_for_resumable(self):
    batch = BatchHttpRequest()

    upload = MediaFileUpload(
        datafile('small.png'), chunksize=500, resumable=True)
    self.request1.resumable = upload
    self.assertRaises(BatchError, batch.add, self.request1, request_id='1')
  def test_http_errors_passed_to_callback(self):
    batch = BatchHttpRequest()
    callbacks = Callbacks()
    cred_1 = MockCredentials('Foo')
    cred_2 = MockCredentials('Bar')

    http = HttpMockSequence([
      ({'status': '200',
        'content-type': 'multipart/mixed; boundary="batch_foobarbaz"'},
       BATCH_RESPONSE_WITH_401),
      ({'status': '200',
        'content-type': 'multipart/mixed; boundary="batch_foobarbaz"'},
       BATCH_RESPONSE_WITH_401),
      ])

    creds_http_1 = HttpMockSequence([])
    cred_1.authorize(creds_http_1)

    creds_http_2 = HttpMockSequence([])
    cred_2.authorize(creds_http_2)

    self.request1.http = creds_http_1
    self.request2.http = creds_http_2

    batch.add(self.request1, callback=callbacks.f)
    batch.add(self.request2, callback=callbacks.f)
    batch.execute(http=http)

    self.assertEqual(None, callbacks.responses['1'])
    self.assertEqual(401, callbacks.exceptions['1'].resp.status)
    self.assertEqual(
        'Authorization Required', callbacks.exceptions['1'].resp.reason)
    self.assertEqual({u'baz': u'qux'}, callbacks.responses['2'])
    self.assertEqual(None, callbacks.exceptions['2'])
Example #9
0
    def get_message_list_info(self, message_ids):
        """
        Batch fetch message info given message_ids

        Args:
            message_ids (list): of message_ids

        Returns:
            dict with messages info
        """
        messages_info = {}

        # Callback function for every service request
        def get_message_info(request_id, response, exception):
            if response:
                messages_info[response['id']] = response
            if exception:
                # If 404, message no longer exists, otherwise raise error
                if exception.resp.status != 404:
                    raise exception
                else:
                    logger.error('404 error: %s' % exception)

        # Setup batch
        batch = BatchHttpRequest(callback=get_message_info)
        for message_id in message_ids:
            batch.add(self.service.users().messages().get(userId='me',
                                                          id=message_id))

        self.execute_service_call(batch)

        return messages_info
Example #10
0
def batch_add_users(service, accountId, users):
    """ Batch add users

    Parameters:
        users (tuple): (user_email, list of requests)
    """
    def handle_create_user(requestId, response, exception):
        if exception is not None:
            print(f"There was an error: {exception}")
        else:
            print(f"Request ID: {requestId}, Created user: {response}")

    batch = BatchHttpRequest(callback=handle_create_user)

    for user in users:
        user_email, requests = user
        for request in requests:
            webPropertyId, profileId, permissions = request
            batch.add(service.management().profileUserLinks().insert(
                accountId=accountId,
                webPropertyId=webPropertyId,
                profileId=profileId,
                body={
                    "permissions": {
                        "local": permissions
                    },
                    "userRef": {
                        "email": user_email
                    },
                },
            ))

    batch.execute()
Example #11
0
    def get_label_list_info(self, messages_ids):
        """
        Batch fetch label info given message_ids

        Args:
            message_ids (list): of message_ids

        Returns:
            dict with label info
        """
        label_info_dict = {}

        # Callback function for every service request
        def get_label_info(request_id, response, exception):
            if response:
                label_info_dict[response['id']] = response
            if exception:
                # If 404, message no longer exists, otherwise raise error
                if exception.resp.status != 404:
                    raise exception

        batch = BatchHttpRequest(callback=get_label_info)

        for message_id in messages_ids:
            # Temporary add snippet
            # TODO: remove snippet
            batch.add(self.service.users().messages().get(
                userId='me',
                id=message_id,
                fields='labelIds,id,threadId,snippet'))

        self.execute_service_call(batch)

        return label_info_dict
Example #12
0
    def test_deserialize_response(self):
        batch = BatchHttpRequest()
        resp, content = batch._deserialize_response(RESPONSE)

        self.assertEqual(200, resp.status)
        self.assertEqual('OK', resp.reason)
        self.assertEqual(11, resp.version)
        self.assertEqual('{"answer": 42}', content)
Example #13
0
    def __init__(self, service, **kwargs):
        if "batch_uri" not in kwargs:
            kwargs["batch_uri"] = service.new_batch_http_request()._batch_uri
        super(EnhancedBatchHttpRequest, self).__init__(**kwargs)
        self._counter = 0
        self._kwargs = kwargs

        # Hidden batch for modtime.
        self._modtime_batch = BatchHttpRequest(**kwargs)
Example #14
0
  def test_add_fail_for_resumable(self):
    batch = BatchHttpRequest()

    upload = MediaFileUpload(
        datafile('small.png'), chunksize=500, resumable=True)
    self.request1.resumable = upload
    with self.assertRaises(BatchError) as batch_error:
      batch.add(self.request1, request_id='1')
    str(batch_error.exception)
Example #15
0
def threadsToArchiveCallback(id, response, exception):
    """

    :param id:
    :param response:
    :param exception:
    :return:
    """
    logger.debug(
        "threadsToArchiveCallback:\nid: {}\nresponse: {}\nexception {}".format(
            id, response, exception))
    # No threads found
    if not response or 'resultSizeEstimate' not in response or response[
            'resultSizeEstimate'] == 0 or 'threads' not in response:
        return

    counter = 0
    archiveBatch = BatchHttpRequest()
    for thread in response['threads']:
        logger.debug("Thread to archive: {}".format(thread))
        body = {
            "removeLabelIds": [
                INBOX_LABEL_NAME,
            ],
            "addLabelIds": [],
        }
        archiveBatch.add(service.users().threads().modify(userId='me',
                                                          id=thread['id'],
                                                          body=body),
                         callback=threadCallback)
        counter += 1

        # Only do 1 run for debug instead of all
        if DEBUG:
            archiveBatch.execute()
            return

        if counter >= MAX_BATCH_SIZE:
            archiveBatch.execute()
            counter = 0
            archiveBatch = BatchHttpRequest()

    if counter > 0:
        archiveBatch.execute()
Example #16
0
def main():
    start_time = time.time()

    # building out the api service
    creds = get_creds()
    service = build('gmail', 'v1', credentials=creds)

    # label = 'CATEGORY_PERSONAL'
    # labels = list_user_labels(service)
    labels = ['INBOX']
    pprint(labels)
    logging.basicConfig(
        level=args.loglevel,
        format='%(asctime)s %(levelname)s %(funcName)s:%(lineno)d %(message)s',
        stream=sys.stdout)
    # remove_bad_labels(labels)

    for label in labels:
        print('current label', label)
        # gets email ids per label
        global msg_ids
        msg_ids = list_email_ids_by_label(service, label)
        # print(len(msg_ids))

        # run each batch
        while len(msg_ids) > 0:

            # break into batches
            msg_ids_batches = break_into_n_size_each_batch(
                msg_ids, max_per_batch)
            print('batches to process:', len(msg_ids_batches), ',per batch:',
                  len(msg_ids_batches[0]), ',last batch:',
                  len(msg_ids_batches[-1]))
            b_id = 0
            for batch in msg_ids_batches:
                # print('batch ids', batch)
                b_id += 1
                print('running batch', b_id)

                batch_request = BatchHttpRequest()
                for msg_id in batch:
                    # print(msg_id)
                    batch_request.add(
                        service.users().messages().get(
                            userId='me',
                            id=msg_id,
                            format='metadata',
                            metadataHeaders=metadata), get_message)
                batch_request.execute()

        print(msg_df.shape)
        print(msg_df.head())
        print('writing to', label + '.csv')
        msg_df.to_csv('new/' + label + '.csv', encoding='utf-8', index=False)

    print('program time:', time.time() - start_time)
Example #17
0
    def fetch(self, msg_ids):
        fetcher = MessageFetcher()

        batch = BatchHttpRequest()
        for msg_desc in msg_ids:
            batch.add(self.service.users().messages().get(userId='me', id=msg_desc['id'],
                format='raw'), callback=fetcher.fetch_message)
        batch.execute()

        return fetcher.messages
Example #18
0
def query_email_api(access_token):
    """
    this queries the Gmail Batch API for Gmail messages.
    param access_token: an oauth access token
    return:
    """
    f = open("emaildata.json", "w+")
    f.close()

    def callback(response):
        """
        generic callback function used to deal with the responses from the Batch API.
        """
        with open('emaildata.json', 'a') as outfile:
            json.dump(response, outfile, indent=4, sort_keys=True)
        return

    credentials = google.oauth2.credentials.Credentials(access_token)
    GMAIL = build('gmail', 'v1', credentials=credentials)
    message_ids = GMAIL.users().messages().list(userId='me', ).execute()

    try:
        messages = []
        count = 0
        if 'messages' in message_ids:
            messages.extend(message_ids['messages'])

        while 'nextPageToken' in message_ids:
            page_token = message_ids['nextPageToken']
            message_ids = GMAIL.users().messages().list(
                userId='me', pageToken=page_token).execute()
            messages.extend(message_ids['messages'])
            count += 1

        message_estimate = count * 100
    except errors.HttpError as error:
        print('An error occurred: %s' % error)

    choice = input(
        "%d Pages of Messages have been found on this account. It is estimated that"
        "would be %d Messages.\nWould you like to continue? Type Y to Proceed, otherwise type any key: "
        % (count, message_estimate))
    if choice == "Y":
        message_ids_sorted = ([message_id['id'] for message_id in messages])
        batch = BatchHttpRequest()
        for msg_id in message_ids_sorted:
            batch.add(GMAIL.users().messages().get(userId='me',
                                                   id=msg_id,
                                                   format='minimal'),
                      callback=callback)
            batch.execute()
        return
    else:
        return
Example #19
0
def getMailsByFilter(service, user_id, cleanup_list):
    try:
        batch = BatchHttpRequest(callback=getCallback)
        for lookup in cleanup_list:
            batch.add(service.users().messages().list(userId=user_id,
                                                      q=lookup),
                      request_id=lookup)
        batch.execute()
        return 0
    except errors.HttpError as ex:
        print(f'Exception:{ex}')
        return 1
Example #20
0
    def test_new_id(self):
        batch = BatchHttpRequest()

        id_ = batch._new_id()
        self.assertEqual('1', id_)

        id_ = batch._new_id()
        self.assertEqual('2', id_)

        batch.add(self.request1, request_id='3')

        id_ = batch._new_id()
        self.assertEqual('4', id_)
Example #21
0
 def test_serialize_get_request_no_body(self):
     batch = BatchHttpRequest()
     request = HttpRequest(
         None,
         None,
         'https://www.googleapis.com/someapi/v1/collection/?foo=bar',
         method='GET',
         body=None,
         headers={'content-type': 'application/json'},
         methodId=None,
         resumable=None)
     s = batch._serialize_request(request).splitlines()
     self.assertEqual(NO_BODY_EXPECTED_GET.splitlines(), s)
Example #22
0
def GetEmails(user: str,
              message_ids: list,
              email_format: str = 'full',
              batch_size=100,
              batch_wait=0):
    """
    Get's all of the emails given a list of message id's (typically from a list request).

    :param user: The user to get the emails from.
    :param message_ids: List of message id's to retrieve.
    :param email_format: The format that the email is returned as, default: "full".
        - "full":       Returns the full email message data with body content parsed in the payload field; the raw field
                        is not used. (default)
        - "metadata":   Returns only email message ID, labels, and email headers.
        - "minimal":    Returns only email message ID and labels; does not return the email headers, body, or payload.
        - "raw":        Returns the full email message data with body content in the raw field as a base64url encoded
                        string; the payload field is not used.
    :param batch_size: Size of the batch request (how many emails to download each iteration).
                       Note: Gmail API's rate limit is 250 request units per second, but any batch_size over 100 tends
                       to exceed that.
    :param batch_wait: How long to wait (seconds) between requests.

    :return: A list of dicts containing the emails and their metadata.
    """
    service = GetService(
        email_address=user,
        scopes=['https://www.googleapis.com/auth/gmail.readonly'])
    print(f'Getting {len(message_ids)} Messages...')
    messages = []
    good = 0
    batches = []
    for i in range(0, len(message_ids), batch_size):
        print(f"Messages: {i+1} - {min(i+batch_size, len(message_ids))}")
        batch = BatchHttpRequest()  # start building a batch request
        for message in message_ids[i:(
                i + batch_size)]:  # add a request for each id
            batch.add(service.users().messages().get(userId='me',
                                                     id=message,
                                                     format=email_format))

        batch.execute()
        batches.append(batch)
        for header, body in batch._responses.values():
            if header['status'] == '200':
                good += 1
                message = json.loads(body)
                messages.append(message)
        time.sleep(batch_wait)
    print(f"Successfully retrieved {good}/{len(message_ids)} messages!")
    return messages, batches
  def test_execute_global_callback(self):
    callbacks = Callbacks()
    batch = BatchHttpRequest(callback=callbacks.f)

    batch.add(self.request1)
    batch.add(self.request2)
    http = HttpMockSequence([
      ({'status': '200',
        'content-type': 'multipart/mixed; boundary="batch_foobarbaz"'},
       BATCH_RESPONSE),
      ])
    batch.execute(http=http)
    self.assertEqual({'foo': 42}, callbacks.responses['1'])
    self.assertEqual({'baz': 'qux'}, callbacks.responses['2'])
Example #24
0
  def test_add_fail_for_over_limit(self):
    from googleapiclient.http import MAX_BATCH_LIMIT

    batch = BatchHttpRequest()
    for i in range(0, MAX_BATCH_LIMIT):
      batch.add(HttpRequest(
        None,
        None,
        'https://www.googleapis.com/someapi/v1/collection/?foo=bar',
        method='POST',
        body='{}',
        headers={'content-type': 'application/json'})
      )
    self.assertRaises(BatchError, batch.add, self.request1)
Example #25
0
def get_messages_batch(msg_ids):
    # create the batch request
    batch_requests = BatchHttpRequest()
    print('in get_messages_batch')
    # print(msg_ids)
    for msg_id in msg_ids:
        print('msg_id', msg_id)
        batch_requests.add(
            service.users().messages().get(userId='me',
                                           id=str(msg_id),
                                           format='metadata',
                                           metadata=metadata), get_message)

    batch_requests.execute()
Example #26
0
def handle_messages_details(message_list: dict, gmail_client: Resource,
                            user_email: str) -> None:
    batch = BatchHttpRequest()

    for i in message_list["messages"]:
        callback = partial(find_and_save_flight_bookings,
                           service=gmail_client,
                           user_id=user_email)
        batch.add(
            gmail_client.users().messages().get(userId=user_email, id=i["id"]),
            callback=callback,
        )

    batch.execute()
    def messages(self, messages_ids):
        #results = self.service.users().messages().list(userId='me', maxResults=100, labelIds= labelIds).execute()
        #Dev.pprint(len(results['messages']))
        results = []
        batch = BatchHttpRequest()

        def handle_message(index, message, error):
            results.append(self.parse_Message(message))

        for message_id in messages_ids:
            batch.add(self.service.users().messages().get(userId='me',
                                                          id=message_id),
                      callback=handle_message)
        batch.execute()
        return results
            def new_batch_http_request(callback=None):
                """Create a BatchHttpRequest object based on the discovery document.

        Args:
          callback: callable, A callback to be called for each response, of the
            form callback(id, response, exception). The first parameter is the
            request id, and the second is the deserialized response object. The
            third is an apiclient.errors.HttpError exception object if an HTTP
            error occurred while processing the request, or None if no error
            occurred.

        Returns:
          A BatchHttpRequest object based on the discovery document.
        """
                return BatchHttpRequest(callback=callback, batch_uri=batch_uri)
Example #29
0
def getThreadsForLabels(labelIdMap):
    """

    :param labelIdMap:
    :return:
    """
    threadFetchBatch = BatchHttpRequest()
    for labelName, id in labelIdMap.iteritems():
        numDays = re.match(LABEL_REGEX, labelName).group(1)
        query = "label:{} older_than:{}d in:inbox".format(labelName, numDays)
        threadFetchBatch.add(service.users().threads().list(userId='me',
                                                            q=query),
                             callback=threadsToArchiveCallback)

    threadFetchBatch.execute()
def batch_add_events(events, oauth2_clinet_id, oauth2_secrete, sender_id,
                     change, org_credentials, callback):
    raw_connection = google_calendar_raw_connection(oauth2_clinet_id,
                                                    oauth2_secrete, sender_id,
                                                    change, org_credentials)
    http = raw_connection['http']
    service = raw_connection['service']
    batch = BatchHttpRequest()

    for event in events:
        batch.add(
            service.events().insert(
                calendarId=settings.GOOGLE_CALENDAR_API_DEFAULT_CALENDAR_ID,
                body=event), callback)

    batch.execute(http=http)