db = Database(logger=log, appname="create-user") cli = argparse.ArgumentParser() cli.add_argument("-u", "--username", required=True, help="Name of user (must be unique)") args = cli.parse_args() if __name__ != "__main__": sys.exit(1) if not re.match(r"[^@]+\@.*?\.[a-zA-Z]+", args.username): print("Please provide an e-mail address as username.") sys.exit(1) try: db.insert("users", data={"name": args.username, "timestamp_token": int(time.time())}) except psycopg2.IntegrityError: print("Error: User %s already exists." % args.username) sys.exit(1) user = User.get_by_name(args.username) if user is None: print("Warning: User not created properly. No password reset e-mail sent.") sys.exit(1) try: user.email_token(new=True) print("An e-mail containing a link through which the registration can be completed has been sent to %s." % args.username) except RuntimeError as e: print(""" WARNING: User registered but no e-mail was sent. The following exception was raised:
threads[chan_thread_id]["timestamp_modified"] = modified_timestamp threads[chan_thread_id]["num_replies"] += 1 if post_data["image_4chan"]: threads[chan_thread_id]["num_images"] += 1 if timestamp > threads[chan_thread_id]["post_last"]: threads[chan_thread_id]["post_last"] = timestamp post_data = {k: str(v).replace("\x00", "") for k, v in post_data.items()} try: new_id = db.insert("posts_4chan", post_data, commit=False, safe=False, return_field="id_seq") except UniqueViolation: print("Duplicate post with id %s in the SQLite dump, skipping." % post_data["id"]) db.rollback() post_data = {} continue # Add to the database! if count > 0 and count % 10000 == 0: print("Committing post %i - %i)" % (count - 10000, count)) db.commit() count += 1
break thread_id = list(post_to_threads[post["msgid"]])[0] postdata = { "id": post["msgid"].replace("\x00", ""), "thread_id": thread_id, "timestamp": post["timestamp"], "subject": post["subject"].replace("\x00", ""), "author": post["from"].replace("\x00", ""), "body": post["message"].replace("\x00", ""), "headers": post["headers"].replace("\x00", ""), "groups": post["groups"] } db.insert("posts_usenet", postdata, commit=False) for group in post["groups"].split(","): if group: db.insert("groups_usenet", { "post_id": post["msgid"], "group": group }, commit=False) if thread_id not in threads: threads[thread_id] = { "timestamp": time.time(), "num_replies": 0, "post_last": "", "post_first": "", "post_last_timestamp": 0 - time.time(),
threads[post["thread_num"]]["is_closed"] = post["locked"] == "1" if post["media_filename"]: threads[post["thread_num"]]["num_images"] += 1 threads[post["thread_num"]]["num_replies"] += 1 threads[post["thread_num"]]["post_last"] = post["num"] threads[post["thread_num"]]["timestamp_modified"] = post["timestamp"] post_data = { k: str(v).replace("\x00", "") for k, v in post_data.items() } new_id = db.insert("posts_4chan", post_data, commit=False, safe=False, return_field="id_seq") if post["deleted"] != "0": db.insert("posts_4chan_deleted", { "id_seq": new_id, "timestamp_deleted": post["deleted"] }) if posts > 0 and posts % 10000 == 0: print("Committing post %i - %i)" % (posts - 10000, posts)) db.commit() db.commit()
(thread_id, args.board)) if not thread_exists: bumplimit = ("bumplimit" in op and op["bumplimit"] == 1) or ("bumplocked" in op and op["bumplocked"] == 1) db.insert( "threads_4chan", { "id": thread_id, "board": args.board, "timestamp": op["time"], "timestamp_scraped": int(time.time()), "timestamp_modified": op["time"], "num_unique_ips": op["unique_ips"] if "unique_ips" in op else -1, "num_images": op["images"] if "images" in op else -1, "num_replies": len(posts), "limit_bump": bumplimit, "limit_image": ("imagelimit" in op and op["imagelimit"] == 1), "is_sticky": ("sticky" in op and op["sticky"] == 1), "is_closed": ("closed" in op and op["closed"] == 1), "post_last": last_post }, commit=False, safe=True) for post in posts: # save dimensions as a dumpable dict - no need to make it indexable if len({"w", "h", "tn_h", "tn_w"} - set(post.keys())) == 0: dimensions = { "w": post["w"],
thread["id"] = thread_id thread["board"] = args.board thread["timestamp_scraped"] = -1 thread["num_unique_ips"] = -1 thread["num_replies"] = 0 thread["num_images"] = 0 thread["is_sticky"] = True if thread["is_sticky"] == 1 else False thread["is_closed"] = True if thread["is_closed"] == 1 else False exists = db.fetchone( "SELECT * FROM threads_" + args.datasource + " WHERE id = %s", (thread_id, )) if not exists: db.insert("threads_" + args.datasource, thread) else: if thread["timestamp"] < exists["timestamp"]: thread["is_sticky"] = exists["is_sticky"] thread["is_closed"] = exists["is_closed"] thread["post_last"] = max(int(thread.get("post_last") or 0), int(exists.get("post_last") or 0)) thread["timestamp_modified"] = max( int(thread.get("timestamp_modified") or 0), int(exists.get("timestamp_modified") or 0)) thread["timestamp_modified"] = max( int(thread.get("timestamp_archived") or 0), int(exists.get("timestamp_archived") or 0)) thread["timestamp"] = min(int(thread.get("timestamp") or 0), int(exists.get("timestamp") or 0))