コード例 #1
0
ファイル: gmvault_db.py プロジェクト: csirmaz/gmvault
    def bury_metadata(self, email_info, local_dir=None, extra_labels=()):
        """
            Store metadata info in .meta file
            Arguments:
             email_info: metadata info
             local_dir : intermediary dir (month dir)
        """
        if local_dir:
            the_dir = '%s/%s' % (self._db_dir, local_dir)
            gmvault_utils.makedirs(the_dir)
        else:
            the_dir = self._db_dir

        meta_path = self.METADATA_FNAME % (
            the_dir, email_info[imap_utils.GIMAPFetcher.GMAIL_ID])

        with open(meta_path, 'w') as meta_desc:
            # parse header fields to extract subject and msgid
            subject, u_subject, msgid, received, h_from, h_to = self.parse_header_fields(
                email_info[imap_utils.GIMAPFetcher.IMAP_HEADER_FIELDS_KEY])

            # need to convert labels that are number as string
            # come from imap_lib when label is a number
            labels = []
            for label in email_info[imap_utils.GIMAPFetcher.GMAIL_LABELS]:
                if isinstance(label, (int, long, float, complex)):
                    label = str(label)

                labels.append(unicode(gmvault_utils.remove_consecutive_spaces_and_strip(label)))

            labels.extend(extra_labels) #add extra labels

            #create json structure for metadata
            meta_obj = {
                         self.ID_K         : email_info[imap_utils.GIMAPFetcher.GMAIL_ID],
                         self.LABELS_K     : labels,
                         self.FLAGS_K      : email_info[imap_utils.GIMAPFetcher.IMAP_FLAGS],
                         self.THREAD_IDS_K : email_info[imap_utils.GIMAPFetcher.GMAIL_THREAD_ID],
                         self.INT_DATE_K   : gmvault_utils.datetime2e(email_info[imap_utils.GIMAPFetcher.IMAP_INTERNALDATE]),
                         self.SUBJECT_K    : subject,
                         self.MSGID_K      : msgid,
                         self.XGM_RECV_K   : received
                       }

            json.dump(meta_obj, meta_desc)

            meta_desc.flush()
            
            gmsql.GMSQL.store_email(
                email_info[imap_utils.GIMAPFetcher.GMAIL_ID],
                email_info[imap_utils.GIMAPFetcher.GMAIL_THREAD_ID], 
                h_from, 
                h_to, 
                u_subject, # unicode 
                email_info[imap_utils.GIMAPFetcher.IMAP_INTERNALDATE], 
                labels
            )

        return email_info[imap_utils.GIMAPFetcher.GMAIL_ID]
コード例 #2
0
ファイル: gmvault_db.py プロジェクト: 10thofhearts/gmvault
    def bury_metadata(self, email_info, local_dir = None, extra_labels = []): #pylint:disable=W0102
        """
            Store metadata info in .meta file
            Arguments:
             email_info: metadata info
             local_dir : intermdiary dir (month dir)
        """
        if local_dir:
            the_dir = '%s/%s' % (self._db_dir, local_dir)
            gmvault_utils.makedirs(the_dir)
        else:
            the_dir = self._db_dir
         
        meta_path = self.METADATA_FNAME % (the_dir, email_info[imap_utils.GIMAPFetcher.GMAIL_ID])
       
        meta_desc = open(meta_path, 'w')
        
        # parse header fields to extract subject and msgid
        subject, msgid, received = self.parse_header_fields(email_info[imap_utils.GIMAPFetcher.IMAP_HEADER_FIELDS_KEY])
        
        # need to convert labels that are number as string
        # come from imap_lib when label is a number
        labels = []
        for label in  email_info[imap_utils.GIMAPFetcher.GMAIL_LABELS]:
            if isinstance(label, (int, long, float, complex)):
                label = str(label)

            labels.append(unicode(gmvault_utils.remove_consecutive_spaces_and_strip(label)))
        
        labels.extend(extra_labels) #add extra labels
        
        #create json structure for metadata
        meta_obj = { 
                     self.ID_K         : email_info[imap_utils.GIMAPFetcher.GMAIL_ID],
                     self.LABELS_K     : labels,
                     self.FLAGS_K      : email_info[imap_utils.GIMAPFetcher.IMAP_FLAGS],
                     self.THREAD_IDS_K : email_info[imap_utils.GIMAPFetcher.GMAIL_THREAD_ID],
                     self.INT_DATE_K   : gmvault_utils.datetime2e(email_info[imap_utils.GIMAPFetcher.IMAP_INTERNALDATE]),
                     self.FLAGS_K      : email_info[imap_utils.GIMAPFetcher.IMAP_FLAGS],
                     self.SUBJECT_K    : subject,
                     self.MSGID_K      : msgid,
                     self.XGM_RECV_K   : received
                   }
        
        json.dump(meta_obj, meta_desc)
        
        meta_desc.flush()
        meta_desc.close()
         
        return email_info[imap_utils.GIMAPFetcher.GMAIL_ID]
コード例 #3
0
ファイル: imap_utils.py プロジェクト: nip3o/gmvault
 def _get_dir_from_labels(cls, label):
     """
        Get the dirs to create from the labels
        
        label: label name with / in it
     """
     
     dirs = []
     
     i = 0
     for lab in label.split('/'):
         lab = gmvault_utils.remove_consecutive_spaces_and_strip(lab)
         if i == 0:
             dirs.append(lab)
         else:
             dirs.append('%s/%s' % (dirs[i-1], lab))
         
         i += 1
     
     return dirs
    def _get_dir_from_labels(cls, label):
        """
           Get the dirs to create from the labels
           
           label: label name with / in it
        """

        dirs = []

        i = 0
        for lab in label.split('/'):
            lab = gmvault_utils.remove_consecutive_spaces_and_strip(lab)
            if i == 0:
                dirs.append(lab)
            else:
                dirs.append('%s/%s' % (dirs[i - 1], lab))

            i += 1

        return dirs
コード例 #5
0
ファイル: imap_utils.py プロジェクト: nip3o/gmvault
 def _build_labels_str(cls, a_labels):
     """
        Create IMAP label string from list of given labels. 
        Convert the labels to utf7
        a_labels: List of labels
     """
     # add GMAIL LABELS
     labels_str = None
     if a_labels and len(a_labels) > 0:
         labels_str = '('
         for label in a_labels:
             label = gmvault_utils.remove_consecutive_spaces_and_strip(label)
             #add not in self.GMAIL_SPECIAL_DIRS_LOWER
             if label.lower() in cls.GMAIL_SPECIAL_DIRS_LOWER:
                 labels_str += '%s ' % (label)
             else:
                 label = label.replace('"', '\\"') #replace quote with escaped quotes
                 labels_str += '\"%s\" ' % (label)
         labels_str = '%s%s' % (labels_str[:-1],')')
     
     return labels_str
コード例 #6
0
 def _build_labels_str(cls, a_labels):
     """
        Create IMAP label string from list of given labels. 
        Convert the labels to utf7
        a_labels: List of labels
     """
     # add GMAIL LABELS
     labels_str = None
     if a_labels and len(a_labels) > 0:
         labels_str = '('
         for label in a_labels:
             label = gmvault_utils.remove_consecutive_spaces_and_strip(label)
             #add not in self.GMAIL_SPECIAL_DIRS_LOWER
             if label.lower() in cls.GMAIL_SPECIAL_DIRS_LOWER:
                 labels_str += '%s ' % (label)
             else:
                 label = label.replace('"', '\\"') #replace quote with escaped quotes
                 labels_str += '\"%s\" ' % (label)
         labels_str = '%s%s' % (labels_str[:-1],')')
     
     return labels_str
コード例 #7
0
ファイル: test_utils.py プロジェクト: 10thofhearts/gmvault
def check_remote_mailbox_identical_to_local(the_self, gmvaulter, extra_labels = []): #pylint: disable=C0103,R0912,R0914,R0915
    """
       Check that the remote mailbox is identical to the local one attached
       to gmvaulter
       Need a connected gmvaulter
    """
    # get all email data from gmvault-db
    pivot_dir  = None
    gmail_ids  = gmvaulter.gstorer.get_all_existing_gmail_ids(pivot_dir)

    print("gmail_ids = %s\n" % (gmail_ids))
    
    #need to check that all labels are there for emails in essential
    gmvaulter.src.select_folder('ALLMAIL')
    
    # check the number of id on disk 
    imap_ids = gmvaulter.src.search({ 'type' : 'imap', 'req' : 'ALL'}) #get everything
    
    the_self.assertEquals(len(imap_ids), \
                      len(gmail_ids), \
                      "Error. Should have the same number of emails: local nb of emails %d,"\
                      " remote nb of emails %d" % (len(gmail_ids), len(imap_ids)))

    for gm_id in gmail_ids:

        print("Fetching id %s with request %s" % (gm_id, imap_utils.GIMAPFetcher.GET_ALL_BUT_DATA))
        #get disk_metadata
        disk_metadata   = gmvaulter.gstorer.unbury_metadata(gm_id)

        print("disk metadata %s\n" % (disk_metadata))

        #date     = disk_metadata['internal_date'].strftime('"%d %b %Y"')
        subject  = disk_metadata.get('subject', None)
        msgid    = disk_metadata.get('msg_id', None)
        received = disk_metadata.get('x_gmail_received', None)

        req = "("
        has_something = False

        #if date:
        #    req += 'HEADER DATE {date}'.format(date=date)
        #    has_something = True

        if subject:
            #split on ' when contained in subject to keep only the first part
            subject = subject.split("'")[0]
            subject = subject.split('"')[0]
            if has_something: #add extra space if it has a date
                req += ' ' 
            req += 'SUBJECT "{subject}"'.format(subject=subject.strip().encode('utf-8'))
            has_something = True

        if msgid:
            if has_something: #add extra space if it has a date
                req += ' ' 
            req += 'HEADER MESSAGE-ID {msgid}'.format(msgid=msgid.strip())
            has_something = True
        
        if received:
            if has_something:
                req += ' '
                req += 'HEADER X-GMAIL-RECEIVED {received}'.format(received=received.strip())
                has_something = True
        
        req += ")"

        print("Req = %s\n" % (req))

        imap_ids = gmvaulter.src.search({ 'type' : 'imap', 'req': req, 'charset': 'utf-8'})

        print("imap_ids = %s\n" % (imap_ids))

        if len(imap_ids) != 1:
            the_self.fail("more than one imap_id (%s) retrieved for request %s" % (imap_ids, req))

        imap_id = imap_ids[0]
        
        # get online_metadata 
        online_metadata = gmvaulter.src.fetch(imap_id, \
                                              imap_utils.GIMAPFetcher.GET_ALL_BUT_DATA) 

        print("online_metadata = %s\n" % (online_metadata))
        print("disk_metadata = %s\n"   % (disk_metadata))

        header_fields = online_metadata[imap_id]['BODY[HEADER.FIELDS (MESSAGE-ID SUBJECT X-GMAIL-RECEIVED)]']
        
        subject, msgid, received = gmvault_db.GmailStorer.parse_header_fields(header_fields)

        #compare metadata
        the_self.assertEquals(subject, disk_metadata.get('subject', None))
        the_self.assertEquals(msgid,   disk_metadata.get('msg_id', None))
        the_self.assertEquals(received, disk_metadata.get('x_gmail_received', None))

        # check internal date it is plus or minus 1 hour
        online_date   = online_metadata[imap_id].get('INTERNALDATE', None) 
        disk_date     = disk_metadata.get('internal_date', None) 

        if online_date != disk_date:
            min_date = disk_date - datetime.timedelta(hours=1)
            max_date = disk_date + datetime.timedelta(hours=1)
            
            if min_date <= online_date <= max_date:
                print("online_date (%s) and disk_date (%s) differs but "\
                      "within one hour. This is OK (timezone pb) *****" % (online_date, disk_date))
            else:
                the_self.fail("online_date (%s) and disk_date (%s) are different" % (online_date, disk_date))

        #check labels
        disk_labels   = disk_metadata.get('labels', None)
        #add extra labels
        for x_lab in extra_labels:
            disk_labels.append(x_lab)

        online_labels = imap_utils.decode_labels(online_metadata[imap_id].get('X-GM-LABELS', None)) 

        #clean potential labels with multiple spaces
        disk_labels   = [ gmvault_utils.remove_consecutive_spaces_and_strip(label) for label in disk_labels ]
        online_labels = [ gmvault_utils.remove_consecutive_spaces_and_strip(label) for label in online_labels ]

        if not disk_labels: #no disk_labels check that there are no online_labels
            the_self.assertTrue(not online_labels)

        print("disk_labels = %s\n" % (disk_labels))
        print("online_labels = %s\n" % (online_labels))
        the_self.assertEquals(len(disk_labels), len(online_labels))

        for label in disk_labels:
            #change label Migrated (lower and uppercase) to gmv-migrated because reserved by Gmail
            if label.lower() == "migrated":
                label = "gmv-migrated"
            elif label.lower() == r"\muted":
                label = "gmv-muted"
            if label not in online_labels:
                the_self.fail("label %s should be in online_labels %s as"\
                              " it is in disk_labels %s" % (label, online_labels, disk_labels))

        # check flags
        disk_flags   = disk_metadata.get('flags', None)
        online_flags = online_metadata[imap_id].get('FLAGS', None) 

        if not disk_flags: #no disk flags
            the_self.assertTrue(not online_flags)

        the_self.assertEquals(len(disk_flags), len(online_flags))

        for flag in disk_flags:
            if flag not in online_flags:
                the_self.fail("flag %s should be in "\
                              "online_flags %s as it is in disk_flags %s" \
                              % (flag, online_flags, disk_flags))        
コード例 #8
0
ファイル: test_utils.py プロジェクト: viruthagiri/gmvault
def check_remote_mailbox_identical_to_local(the_self,
                                            gmvaulter,
                                            extra_labels=[]):  #pylint: disable=C0103,R0912,R0914,R0915
    """
       Check that the remote mailbox is identical to the local one attached
       to gmvaulter
       Need a connected gmvaulter
    """
    # get all email data from gmvault-db
    pivot_dir = None
    gmail_ids = gmvaulter.gstorer.get_all_existing_gmail_ids(pivot_dir)

    print("gmail_ids = %s\n" % (gmail_ids))

    #need to check that all labels are there for emails in essential
    gmvaulter.src.select_folder('ALLMAIL')

    # check the number of id on disk
    imap_ids = gmvaulter.src.search({
        'type': 'imap',
        'req': 'ALL'
    })  #get everything

    the_self.assertEquals(len(imap_ids), \
                      len(gmail_ids), \
                      "Error. Should have the same number of emails: local nb of emails %d,"\
                      " remote nb of emails %d" % (len(gmail_ids), len(imap_ids)))

    for gm_id in gmail_ids:

        print("Fetching id %s with request %s" %
              (gm_id, imap_utils.GIMAPFetcher.GET_ALL_BUT_DATA))
        #get disk_metadata
        disk_metadata = gmvaulter.gstorer.unbury_metadata(gm_id)

        print("disk metadata %s\n" % (disk_metadata))

        #date     = disk_metadata['internal_date'].strftime('"%d %b %Y"')
        subject = disk_metadata.get('subject', None)
        msgid = disk_metadata.get('msg_id', None)
        received = disk_metadata.get('x_gmail_received', None)

        req = "("
        has_something = False

        #if date:
        #    req += 'HEADER DATE {date}'.format(date=date)
        #    has_something = True

        if subject:
            #split on ' when contained in subject to keep only the first part
            subject = subject.split("'")[0]
            subject = subject.split('"')[0]
            if has_something:  #add extra space if it has a date
                req += ' '
            req += 'SUBJECT "{subject}"'.format(
                subject=subject.strip().encode('utf-8'))
            has_something = True

        if msgid:
            if has_something:  #add extra space if it has a date
                req += ' '
            req += 'HEADER MESSAGE-ID {msgid}'.format(msgid=msgid.strip())
            has_something = True

        if received:
            if has_something:
                req += ' '
                req += 'HEADER X-GMAIL-RECEIVED {received}'.format(
                    received=received.strip())
                has_something = True

        req += ")"

        print("Req = %s\n" % (req))

        imap_ids = gmvaulter.src.search({
            'type': 'imap',
            'req': req,
            'charset': 'utf-8'
        })

        print("imap_ids = %s\n" % (imap_ids))

        if len(imap_ids) != 1:
            the_self.fail(
                "more than one imap_id (%s) retrieved for request %s" %
                (imap_ids, req))

        imap_id = imap_ids[0]

        # get online_metadata
        online_metadata = gmvaulter.src.fetch(imap_id, \
                                              imap_utils.GIMAPFetcher.GET_ALL_BUT_DATA)

        print("online_metadata = %s\n" % (online_metadata))
        print("disk_metadata = %s\n" % (disk_metadata))

        header_fields = online_metadata[imap_id][
            'BODY[HEADER.FIELDS (MESSAGE-ID SUBJECT X-GMAIL-RECEIVED)]']

        subject, msgid, received = gmvault_db.GmailStorer.parse_header_fields(
            header_fields)

        #compare metadata
        the_self.assertEquals(subject, disk_metadata.get('subject', None))
        the_self.assertEquals(msgid, disk_metadata.get('msg_id', None))
        the_self.assertEquals(received,
                              disk_metadata.get('x_gmail_received', None))

        # check internal date it is plus or minus 1 hour
        online_date = online_metadata[imap_id].get('INTERNALDATE', None)
        disk_date = disk_metadata.get('internal_date', None)

        if online_date != disk_date:
            min_date = disk_date - datetime.timedelta(hours=1)
            max_date = disk_date + datetime.timedelta(hours=1)

            if min_date <= online_date <= max_date:
                print("online_date (%s) and disk_date (%s) differs but "\
                      "within one hour. This is OK (timezone pb) *****" % (online_date, disk_date))
            else:
                the_self.fail(
                    "online_date (%s) and disk_date (%s) are different" %
                    (online_date, disk_date))

        #check labels
        disk_labels = disk_metadata.get('labels', None)
        #add extra labels
        for x_lab in extra_labels:
            disk_labels.append(x_lab)

        online_labels = imap_utils.decode_labels(online_metadata[imap_id].get(
            'X-GM-LABELS', None))

        #clean potential labels with multiple spaces
        disk_labels = [
            gmvault_utils.remove_consecutive_spaces_and_strip(label)
            for label in disk_labels
        ]
        online_labels = [
            gmvault_utils.remove_consecutive_spaces_and_strip(label)
            for label in online_labels
        ]

        if not disk_labels:  #no disk_labels check that there are no online_labels
            the_self.assertTrue(not online_labels)

        print("disk_labels = %s\n" % (disk_labels))
        print("online_labels = %s\n" % (online_labels))
        the_self.assertEquals(len(disk_labels), len(online_labels))

        for label in disk_labels:
            #change label Migrated (lower and uppercase) to gmv-migrated because reserved by Gmail
            if label.lower() == "migrated":
                label = "gmv-migrated"
            elif label.lower() == r"\muted":
                label = "gmv-muted"
            if label not in online_labels:
                the_self.fail("label %s should be in online_labels %s as"\
                              " it is in disk_labels %s" % (label, online_labels, disk_labels))

        # check flags
        disk_flags = disk_metadata.get('flags', None)
        online_flags = online_metadata[imap_id].get('FLAGS', None)

        if not disk_flags:  #no disk flags
            the_self.assertTrue(not online_flags)

        the_self.assertEquals(len(disk_flags), len(online_flags))

        for flag in disk_flags:
            if flag not in online_flags:
                the_self.fail("flag %s should be in "\
                              "online_flags %s as it is in disk_flags %s" \
                              % (flag, online_flags, disk_flags))