def handle(self, *args, **options): if not args: raise CommandError("Require a tag, e.g. sid, wheezy, squeeze, etc...") tag = args[0] filenames = args[1:] if not tag.isalpha() or not tag.islower(): raise CommandError("First argument must be tag, e.g. sid, wheezy, squeeze, etc...") self.session = db.get_db_session() # self.session.bind.echo=False self.descr_text_map = {} self.descr_map = {} self.stats = defaultdict(int) for file in filenames: self.stderr.write('Processing %s\n' % file) f = self._open(file) for para in Deb822.iter_paragraphs(f): if 'Description-en' in para: self._handle_translation_en(para) if 'Version' in para: try: self._handle_packages(tag, para) except Exception, e: self.stdout.write("Problem processing %r\n%s\n" % (para, e)) self.session.commit()
def handle(self, *args, **options): if len(args) != 3: raise CommandError( "Requires a language, a tag and an output filename") lang = args[0] tag = args[1] filename = args[2] f = open(filename, "w") session = db.get_db_session() session.bind.echo = False output = False for trans, descr, descr_tag in session.query(ddtp.Translation, ddtp.Description, ddtp.DescriptionTag). \ filter(ddtp.Translation.description_id == ddtp.Description.description_id). \ filter(ddtp.Description.description_id == ddtp.DescriptionTag.description_id). \ filter(ddtp.Translation.language == lang). \ filter(ddtp.DescriptionTag.date_end >= date.today()-timedelta(days=7)). \ filter(ddtp.DescriptionTag.tag == tag). \ order_by(ddtp.Description.package). \ yield_per(100): trans_para = Deb822() trans_para['Package'] = descr.package trans_para['Description-md5'] = descr.description_md5 trans_para['Description-%s' % lang] = trans.translation # Minor nagic here: the translation has an extra newline here, # which we use to seperate the paragraphs f.write(trans_para.dump().encode('utf-8')) output = True if not output: self.stderr.write("WARNING: No output for tag %r, language %r\n" % (tag, lang))
def handle(self, *args, **options): if len(args) != 3: raise CommandError("Requires a language, a tag and an output filename") lang = args[0] tag = args[1] filename = args[2] f = open(filename, "w") session = db.get_db_session() session.bind.echo=False output = False for trans, descr, descr_tag in session.query(ddtp.Translation, ddtp.Description, ddtp.DescriptionTag). \ filter(ddtp.Translation.description_id == ddtp.Description.description_id). \ filter(ddtp.Description.description_id == ddtp.DescriptionTag.description_id). \ filter(ddtp.Translation.language == lang). \ filter(ddtp.DescriptionTag.date_end >= date.today()-timedelta(days=7)). \ filter(ddtp.DescriptionTag.tag == tag). \ order_by(ddtp.Description.package). \ yield_per(100): trans_para = Deb822() trans_para['Package'] = descr.package trans_para['Description-md5'] = descr.description_md5 trans_para['Description-%s' % lang] = trans.translation # Minor nagic here: the translation has an extra newline here, # which we use to seperate the paragraphs f.write(trans_para.dump().encode('utf-8')) output = True if not output: self.stderr.write("WARNING: No output for tag %r, language %r\n" % (tag, lang))
def handle(self, *args, **options): if len(args) != 2: raise CommandError("Require a language and a filename.") language = args[0] filename = args[1] self.session = db.get_db_session() self.session.bind.echo = False self.lang = self.session.query(ddtss.Languages).get(language) if not self.lang: raise CommandError("Unknown language '%s'" % language) file = open(filename) words = set() for line in file: line = line.strip() if not line: continue word, text = line.split('\t', 1) word = word.lower() if word in words: print "Skipping duplicate '%s'" % word continue words.add(word) newword = ddtss.WordlistEntry(language=self.lang, word=word, translation=text) self.session.add(newword) self.session.commit()
def handle(self, *args, **options): if len(args) != 2: raise CommandError("Require a language and a filename.") language = args[0] filename = args[1] self.session = db.get_db_session() self.session.bind.echo=False self.lang = self.session.query(ddtss.Languages).get(language) if not self.lang: raise CommandError("Unknown language '%s'" % language) file = open(filename) words = set() for line in file: line = line.strip() if not line: continue word, text = line.split('\t', 1) word = word.lower() if word in words: print "Skipping duplicate '%s'" % word continue words.add(word) newword = ddtss.WordlistEntry(language=self.lang, word=word, translation=text) self.session.add(newword) self.session.commit()
def handle_noargs(self, **options): global users, languages, packages, package_reviews session = db.get_db_session() users = defaultdict(ddtss.Users) packages = defaultdict(ddtss.PendingTranslation) res = session.query(ddtss.Languages).all() languages = defaultdict( lambda: ddtss.Languages(translation_model=DefaultTranslationModel()), ((r.language, r) for r in res) ) for key, val in session.query(DDTSS.key, DDTSS.value).yield_per(100): for key_regex, val_regex, func in keymap: key_match = re.match(key_regex, key) if key_match: val_match = re.match(val_regex, val) if not val_match: print "Key %r value doesn't match regex" % key break func(key_match.groups(), val_match.groups()) break else: print "Unknown key: %r" % key print "%d users total" % len(users) now = time.time() save_users = [] for username, user in users.iteritems(): if not user.key: continue if user.lastseen < now-(180*86400): continue if user.lastlanguage_ref and user.lastlanguage_ref != 'xx': user.lastlanguage = languages[user.lastlanguage_ref] else: user.lastlanguage_ref = None user.active = True user.username = username save_users.append(user) print "%d users saved" % len(save_users) print "%d packages total" % len(packages) save_package = [] messages = [] for package_key, package in packages.iteritems(): try: if not hasattr(package,'data'): # Incomplete record continue if package.state is None: continue if not hasattr(package, 'long'): # Not submitted even once print "%r skipped due to not used" % (package_key,) continue # The old system didn't know the description ID, so we look it # up by the MD5 sum of the description in the data field m = re.search(r'(?m)^Description: (.*)\n((?: .*\n)+)', package.data) if not m: print "Couldn't extract description from %r" % package_key continue short, long = m.groups() md5 = hashlib.md5(short+"\n"+long).hexdigest() descr, = session.query(ddtp.Description.description_id).filter_by(description_md5=md5).one() package.description_id = descr # Handle renamed fields if hasattr(package, 'iter'): package.iteration = package.iter else: package.iteration = 0 if hasattr(package, 'owner'): package.owner_username = package.owner if hasattr(package, 'timestamp'): package.lastupdate = package.timestamp else: package.lastupdate = now if hasattr(package,'agefield'): package.firstupdate = package.agefield else: package.firstupdate = now package.language = languages[package_key[0]] if package.comment: if package.language: message = ddtss.Messages( message=package.comment, actionstring="old comments", to_user=None, language=package_key[0], for_description=package.description_id, from_user=None, in_reply_to=None, timestamp=int(time.time())) messages.append(message) package.comment=""; save_package.append(package_key) except Exception, e: print "Package %r: %s" % (package_key, e)