def main(): """Main function""" couch = couchdb.Server() topics_db = couch['meneame_topic_db'] news_db = couch['meneame'] logging.info('Loading topic distribution...') logging.info('Retrieving news from DB...') news = {} for post in news_db: new = dict(news_db.get(post)) news[new['_id']] = { 'description': new['description'], 'title': new['title'], 'votes': new['votes'] } logging.info('Merging news and topics...') for topic in topics_db: aux = dict(topics_db.get(topic)) data = news[aux['article_id']] data['topic_id'] = aux['topic_id'] data['slice_id'] = aux['slice_id'] data['slice_date'] = aux['slice_date'] news[aux['article_id']] = data logging.info('Generating JSON files...') json.dump(news, open('web/meneapp/assets/data/topic_news.json', 'w'))
try: for part in msg.walk(): json_part = {} if part.get_content_maintype() == 'multipart': continue json_part['contentType'] = part.get_content_type() content = part.get_payload(decode=False).decode('utf-8', 'ignore') json_part['content'] = cleanContent(content) json_msg['parts'].append(json_part) except Exception, e: sys.stderr.write('Skipping message - error encountered (%s)' % (str(e), )) finally: return json_msg #Note: opening in binary mode is recommended mbox = mailbox.UnixMailbox(open(MBOX, 'rb'), email.message_from_file) def gen_json_msgs(m_box): while 1: msg = m_box.next() if msg is None: break yield jsonifyMessage(msg) if OUT_FILE: json.dump(gen_json_msgs(mbox),open(OUT_FILE, 'wb'), indent=4) else: print json.dumps(gen_json_msgs(mbox), indent=4)
def save(self): """ Speichert die Daten ins Dateisystem """ data = {} # Timestamp now = datetime.datetime.now() self.modified_timestamp = now if not self.created_timestamp: self.created_timestamp = now # Datenordner erstellen, falls dieser noch nicht existiert if not os.path.isdir(self.datadir_current_path): os.makedirs(self.datadir_current_path, NEW_DIR_MODE) # Sprachunabhängige Einstellungen ermitteln for data_key_item in self.all_data_keys: data_key_name = data_key_item["name"] data_key_type = data_key_item["type"] if data_key_type == TYPE_TIMESTAMP: timestamp = getattr(self, data_key_name, None) if timestamp: assert isinstance(timestamp, datetime.datetime) data[data_key_name] = timestamp.isoformat() else: data[data_key_name] = None else: data[data_key_name] = getattr(self, data_key_name, None) # Sprachabhängigen *content* in Blob speichern for lang in self.keys(): # Zwischengespeicherten Content ermitteln content = self[lang]._content if content is None: continue # Falls es sich um Unicode handelt, wird dieser nach # UTF-8 umgewandelt if isinstance(content, unicode): content = content.encode("utf-8") # Blob komprimieren und speichern, falls der Datentyp nicht # in der Ausnahmsliste steht. if self.content_type in constants.CONTENT_TYPES_NOT_COMPRESSIBLE: content_data = content else: content_data = snappy.compress(content) # MD5-Hash erstellen und Blob-Namen zusammensetzen md5hash = hashlib.md5(content_data).hexdigest() if self.content_type in constants.CONTENT_TYPES_NOT_COMPRESSIBLE: blob_name = md5hash + ".blob" else: blob_name = md5hash + ".snappy" self[lang].content_blob_name = blob_name # Ordner für den Blob erstellen blob_dir = os.path.join(config.DATABLOBSDIR.value, blob_name[0]) if not os.path.isdir(blob_dir): os.makedirs(blob_dir, NEW_DIR_MODE) # Blob speichern blob_path = os.path.join(blob_dir, blob_name) if not os.path.isfile(blob_path): with io.open(blob_path, "wb") as blob_file: blob_file.write(content_data) # Temporären Content-Zwischenspeicher löschen self[lang]._content = None # Sprachabhängige Einstellungen ermitteln for data_key_item in LangData.all_data_keys: data_key_name = data_key_item["name"] data_key_type = data_key_item["type"] for lang in self.keys(): if data_key_type == TYPE_TIMESTAMP: timestamp = getattr(self[lang], data_key_name) if timestamp: assert isinstance(timestamp, datetime.datetime) data.setdefault(data_key_name, {})[lang] = timestamp.isoformat() else: data.setdefault(data_key_name, {})[lang] = None else: data.setdefault(data_key_name, {})[lang] = getattr(self[lang], data_key_name) # Neuen Namen für JSON-Datei ermitteln new_json_filename = now.isoformat().replace("-", "").replace(":", "").replace(".", "")[:17] + ".json" new_json_path = os.path.join(self.datadir_current_path, new_json_filename) # Neue JSON-Datei speichern with io.open(new_json_path, "wb") as new_json_file: os.fchmod(new_json_file.fileno(), NEW_FILE_MODE) json.dump(data, new_json_file, indent=2) # Alte JSON-Dateien mit Snappy komprimieren und in den Archivordner # speichern. for json_path in glob.glob(os.path.join(self.datadir_current_path, "*.json")): if json_path == new_json_path: continue # Archivordner ermitteln und erstellen year_str = os.path.basename(json_path)[:4] archivedir_path = os.path.join(self.datadir_archive_path, year_str) if not os.path.isdir(archivedir_path): os.makedirs(archivedir_path, NEW_DIR_MODE) # Alte JSON-Datei mit Snappy in den Archivordner komprimieren snappy_filename = os.path.basename(json_path) + ".snappy" snappy_path = os.path.join(archivedir_path, snappy_filename) with io.open(snappy_path, "wb") as snappy_file: os.fchmod(snappy_file.fileno(), NEW_FILE_MODE) with io.open(json_path, "rb") as old_json_file: snappy_file.write(snappy.compress(old_json_file.read())) # Alte JSON-Datei löschen os.remove(json_path)