예제 #1
0
def main():
    """Main function"""

    couch = couchdb.Server()
    topics_db = couch['meneame_topic_db']
    news_db = couch['meneame']
    logging.info('Loading topic distribution...')

    logging.info('Retrieving news from DB...')
    news = {}
    for post in news_db:
        new = dict(news_db.get(post))
        news[new['_id']] = {
            'description': new['description'],
            'title': new['title'],
            'votes': new['votes']
        }

    logging.info('Merging news and topics...')
    for topic in topics_db:
        aux = dict(topics_db.get(topic))
        data = news[aux['article_id']]

        data['topic_id'] = aux['topic_id']
        data['slice_id'] = aux['slice_id']
        data['slice_date'] = aux['slice_date']

        news[aux['article_id']] = data

    logging.info('Generating JSON files...')
    json.dump(news, open('web/meneapp/assets/data/topic_news.json', 'w'))
    try:
        for part in msg.walk():
            json_part = {}
            if part.get_content_maintype() == 'multipart':
                continue
            json_part['contentType'] = part.get_content_type()
            content = part.get_payload(decode=False).decode('utf-8', 'ignore')
            json_part['content'] = cleanContent(content)

            json_msg['parts'].append(json_part)
    except Exception, e:
        sys.stderr.write('Skipping message - error encountered (%s)' % (str(e), ))
    finally:
        return json_msg

 #Note: opening in binary mode is recommended
 
 mbox = mailbox.UnixMailbox(open(MBOX, 'rb'), email.message_from_file)  
 def gen_json_msgs(m_box):
    while 1:
        msg = m_box.next()
        if msg is None:
            break
        yield jsonifyMessage(msg)
        
if OUT_FILE:
    json.dump(gen_json_msgs(mbox),open(OUT_FILE, 'wb'), indent=4)
else:
    print json.dumps(gen_json_msgs(mbox), indent=4)
예제 #3
0
    def save(self):
        """
        Speichert die Daten ins Dateisystem
        """

        data = {}

        # Timestamp
        now = datetime.datetime.now()
        self.modified_timestamp = now
        if not self.created_timestamp:
            self.created_timestamp = now

        # Datenordner erstellen, falls dieser noch nicht existiert
        if not os.path.isdir(self.datadir_current_path):
            os.makedirs(self.datadir_current_path, NEW_DIR_MODE)

        # Sprachunabhängige Einstellungen ermitteln
        for data_key_item in self.all_data_keys:
            data_key_name = data_key_item["name"]
            data_key_type = data_key_item["type"]
            if data_key_type == TYPE_TIMESTAMP:
                timestamp = getattr(self, data_key_name, None)
                if timestamp:
                    assert isinstance(timestamp, datetime.datetime)
                    data[data_key_name] = timestamp.isoformat()
                else:
                    data[data_key_name] = None
            else:
                data[data_key_name] = getattr(self, data_key_name, None)

        # Sprachabhängigen *content* in Blob speichern
        for lang in self.keys():

            # Zwischengespeicherten Content ermitteln
            content = self[lang]._content
            if content is None:
                continue

            # Falls es sich um Unicode handelt, wird dieser nach
            # UTF-8 umgewandelt
            if isinstance(content, unicode):
                content = content.encode("utf-8")

            # Blob komprimieren und speichern, falls der Datentyp nicht
            # in der Ausnahmsliste steht.
            if self.content_type in constants.CONTENT_TYPES_NOT_COMPRESSIBLE:
                content_data = content
            else:
                content_data = snappy.compress(content)

            # MD5-Hash erstellen und Blob-Namen zusammensetzen
            md5hash = hashlib.md5(content_data).hexdigest()
            if self.content_type in constants.CONTENT_TYPES_NOT_COMPRESSIBLE:
                blob_name = md5hash + ".blob"
            else:
                blob_name = md5hash + ".snappy"
            self[lang].content_blob_name = blob_name

            # Ordner für den Blob erstellen
            blob_dir = os.path.join(config.DATABLOBSDIR.value, blob_name[0])
            if not os.path.isdir(blob_dir):
                os.makedirs(blob_dir, NEW_DIR_MODE)

            # Blob speichern
            blob_path = os.path.join(blob_dir, blob_name)
            if not os.path.isfile(blob_path):
                with io.open(blob_path, "wb") as blob_file:
                    blob_file.write(content_data)

            # Temporären Content-Zwischenspeicher löschen
            self[lang]._content = None

        # Sprachabhängige Einstellungen ermitteln
        for data_key_item in LangData.all_data_keys:
            data_key_name = data_key_item["name"]
            data_key_type = data_key_item["type"]
            for lang in self.keys():
                if data_key_type == TYPE_TIMESTAMP:
                    timestamp = getattr(self[lang], data_key_name)
                    if timestamp:
                        assert isinstance(timestamp, datetime.datetime)
                        data.setdefault(data_key_name, {})[lang] = timestamp.isoformat()
                    else:
                        data.setdefault(data_key_name, {})[lang] = None
                else:
                    data.setdefault(data_key_name, {})[lang] = getattr(self[lang], data_key_name)

        # Neuen Namen für JSON-Datei ermitteln
        new_json_filename = now.isoformat().replace("-", "").replace(":", "").replace(".", "")[:17] + ".json"
        new_json_path = os.path.join(self.datadir_current_path, new_json_filename)

        # Neue JSON-Datei speichern
        with io.open(new_json_path, "wb") as new_json_file:
            os.fchmod(new_json_file.fileno(), NEW_FILE_MODE)
            json.dump(data, new_json_file, indent=2)

        # Alte JSON-Dateien mit Snappy komprimieren und in den Archivordner
        # speichern.
        for json_path in glob.glob(os.path.join(self.datadir_current_path, "*.json")):
            if json_path == new_json_path:
                continue

            # Archivordner ermitteln und erstellen
            year_str = os.path.basename(json_path)[:4]
            archivedir_path = os.path.join(self.datadir_archive_path, year_str)
            if not os.path.isdir(archivedir_path):
                os.makedirs(archivedir_path, NEW_DIR_MODE)

            # Alte JSON-Datei mit Snappy in den Archivordner komprimieren
            snappy_filename = os.path.basename(json_path) + ".snappy"
            snappy_path = os.path.join(archivedir_path, snappy_filename)

            with io.open(snappy_path, "wb") as snappy_file:
                os.fchmod(snappy_file.fileno(), NEW_FILE_MODE)
                with io.open(json_path, "rb") as old_json_file:
                    snappy_file.write(snappy.compress(old_json_file.read()))

            # Alte JSON-Datei löschen
            os.remove(json_path)