def init_admin(): # Add the admin user if it is not present. from biostar.apps.users.models import User email = settings.ADMIN_EMAIL admin = User.objects.filter(email=email) if not admin: admin = User(email=email, is_staff=True, is_admin=True, name=settings.ADMIN_NAME, type=User.ADMIN) admin.set_password(settings.SECRET_KEY) admin.save() logger.info( "added admin user with email=%s, password=SECRET_KEY, name=%s" % (admin.email, admin.get_full_name()))
def get_user(uid, allusers): found = 0 for row in allusers: if row[0] == uid: name = row[1] pubkey = row[2] found = 1 break try: user = User.objects.get(pubkey=pubkey) logger.info('Fetched user: %s' % user.pubkey) except: user = User(pubkey=pubkey) user.save() logger.info('Created user: %s' % user.pubkey) return user
def get_user(uid, allusers): found=0 for row in allusers: if row[0] == uid: name = row[1] email = row[2] found=1 break try: user = User.objects.get(email=email) logger.info('Fetched user: %s' % user.name) except: user = User(email=email, name=name) user.save() logger.info('Created user: %s' % user.name) return user
def init_admin(): # Add the admin user if it is not present. from biostar.apps.users.models import User email = settings.ADMIN_EMAIL admin = User.objects.filter(email=email) if not admin: admin = User( email=email, is_staff=True, is_admin=True, name=settings.ADMIN_NAME, type=User.ADMIN ) admin.set_password(settings.SECRET_KEY) admin.save() logger.info( "added admin user with email=%s, password=SECRET_KEY, name=%s" % (admin.email, admin.get_full_name()))
def get_user(uid, allusers): found = 0 for row in allusers: if row[0] == uid: name = row[1] email = row[2] found = 1 break try: user = User.objects.get(email=email) logger.info('Fetched user: %s' % user.name) except: user = User(email=email, name=name) user.save() logger.info('Created user: %s' % user.name) return user
def parse_mboxx(filename, limit=None, tag_val=''): from biostar.server.models import disconnect_all from biostar.apps.users.models import User from biostar.apps.posts.models import Post global SKIPPED_REPLY #users = User.objects.all().delete() users = User.objects.all() users = dict([(u.email, u) for u in users]) #Post.objects.all().delete() logger.info("*** found %s users" % len(users)) if limit is not None: limit = int(limit) # Disconnect signals disconnect_all() logger.info("*** parsing mbox %s" % filename) new_name = fix_file(filename) # Parse the modified mbox. mbox = mailbox.mbox(new_name) rows = imap(unpack_message, mbox) # Remove empty elements rows = ifilter(None, rows) # Keep only email with sender and subject. rows = ifilter(lambda b: b.email, rows) rows = ifilter(lambda b: b.subj, rows) # Apply limits if necessary. rows = islice(rows, limit) tree, posts, fallback = {}, {}, {} for b in rows: datefmt = b.date.strftime('%Y-%m-%d') logger.info("*** %s parsing %s " % (datefmt, b.subj)) if b.email not in users: logger.info("--- creating user name:%s, email:%s" % (b.name, b.email)) u = User(email=b.email, name=b.name) if not DRY_RUN: u.save() u.profile.date_joined = b.date u.profile.last_login = b.date u.profile.save() users[u.email] = u author = users[b.email] parent = posts.get(b.reply_to) or fallback.get(b.subj) # Looks like a reply but still no parent # Fuzzy matching to commence if not parent and b.subj.startswith("Re:"): curr_key = b.subj logger.info("searching for best match %s" % curr_key) cands = difflib.get_close_matches(curr_key, fallback.keys()) if cands: logger.info("found %s" % cands) parent = fallback[cands[0]] if parent: root = parent.root post = create_post(b=b, author=author, parent=parent) else: post = create_post(b=b, author=author, tag_val=tag_val) posts[b.id] = post # Fall back to guessing post inheritance from the title fall_key = "Re: %s" % post.title fallback[fall_key] = post logger.info("*** users %s" % len(users)) logger.info("*** posts %s" % len(posts)) logger.info("*** post limit: %s" % limit) logger.info("*** skipped posts due to size: %s" % SKIPPED_SIZE) logger.info("*** skipped posts due to missing parent: %s" % SKIPPED_REPLY) if DRY_RUN: logger.info("*** dry run, no data saved") sys.exit() logger.info("*** updating user scores") for user in User.objects.all(): score = Post.objects.filter(author=user).count() user.score = user.full_score = score user.save() latest = Post.objects.filter( author=user).order_by("-creation_date")[:1] if latest: user.profile.last_login = latest[0].creation_date user.profile.save()
def parse_mboxx(filename, limit=None, tag_val=''): from biostar.server.models import disconnect_all from biostar.apps.users.models import User from biostar.apps.posts.models import Post global SKIPPED_REPLY #users = User.objects.all().delete() users = User.objects.all() users = dict([(u.email, u) for u in users]) #Post.objects.all().delete() logger.info("*** found %s users" % len(users)) if limit is not None: limit = int(limit) # Disconnect signals disconnect_all() logger.info("*** parsing mbox %s" % filename) new_name = fix_file(filename) # Parse the modified mbox. mbox = mailbox.mbox(new_name) rows = imap(unpack_message, mbox) # Remove empty elements rows = ifilter(None, rows) # Keep only email with sender and subject. rows = ifilter(lambda b: b.email, rows) rows = ifilter(lambda b: b.subj, rows) # Apply limits if necessary. rows = islice(rows, limit) tree, posts, fallback = {}, {}, {} # titles that have been seen in the past roots = {} for b in rows: datefmt = b.date.strftime('%Y-%m-%d') logger.info("*** %s parsing %s " % (datefmt, b.subj)) if b.email not in users: logger.info("--- creating user name:%s, email:%s" % (b.name, b.email)) u = User(email=b.email, name=b.name) if not DRY_RUN: u.save() u.profile.date_joined = b.date u.profile.last_login = b.date u.profile.save() users[u.email] = u author = users[b.email] parent = posts.get(b.reply_to) or fallback.get(b.subj) # Looks like a reply but still no parent # Fuzzy matching to commence if not parent and b.subj.lower().startswith("Re:"): curr_key = b.subj logger.info("searching for best match %s" % curr_key) cands = difflib.get_close_matches(curr_key, fallback.keys()) if cands: logger.info("found %s" % cands) parent = fallback[cands[0]] # some emailers do not append Re: to replies, this is a heuristics if not parent and b.subj in roots: # try a candidate cand = roots[b.subj] delta = b.date - cand.creation_date if delta < timedelta(weeks=5): parent = cand if parent: root = parent.root post = create_post(b=b, author=author, parent=parent) else: post = create_post(b=b, author=author, tag_val=tag_val) posts[b.id] = post # keep track of posts that could be parents if not parent: roots[b.subj] = post # Fall back to guessing post inheritance from the title fall_key = "Re: %s" % post.title fallback[fall_key] = post logger.info("*** users %s" % len(users)) logger.info("*** posts %s" % len(posts)) logger.info("*** post limit: %s" % limit) logger.info("*** skipped posts due to size: %s" % SKIPPED_SIZE) logger.info("*** skipped posts due to missing parent: %s" % SKIPPED_REPLY) if DRY_RUN: logger.info("*** dry run, no data saved") sys.exit() logger.info("*** updating user scores") for user in User.objects.all(): score = Post.objects.filter(author=user).count() user.score = user.full_score = score user.save() latest = Post.objects.filter(author=user).order_by("-creation_date")[:1] if latest: user.profile.last_login = latest[0].creation_date user.profile.save()