예제 #1
0
    def validate_query(query, request, user):
        """
        Validate input for a dataset query on the Douban data source.

        :param dict query:  Query parameters, from client-side.
        :param request:  Flask request
        :param User user:  User object of user who has submitted the query
        :return dict:  Safe query parameters
        """
        filtered_query = {}

        # the dates need to make sense as a range to search within
        after, before = query.get("daterange")
        if before and after and before < after:
            raise QueryParametersException("Date range must start before it ends")

        filtered_query["min_date"], filtered_query["max_date"] = (after, before)

        # normalize groups to just their IDs, even if a URL was provided, and
        # limit to 25
        groups = [group.split("/group/").pop().split("/")[0].strip() for group in
                  query["groups"].replace("\n", ",").split(",")]
        groups = [group for group in groups if group][:25]
        if not any(groups):
            raise QueryParametersException("No valid groups were provided.")

        filtered_query["groups"] = ",".join(groups)

        # max amount of topics is 200 because after that Douban starts throwing 429s
        filtered_query["amount"] = max(min(convert_to_int(query["amount"], 10), 200), 1)

        # strip HTML from posts?
        filtered_query["strip"] = bool(query.get("strip", False))

        return filtered_query
예제 #2
0
    def validate_query(query, request, user):
        """
        Validate BitChute query input

        :param dict query:  Query parameters, from client-side.
        :param request:  Flask request
        :param User user:  User object of user who has submitted the query
        :return dict:  Safe query parameters
        """
        # no query 4 u
        if not query.get("query", "").strip():
            raise QueryParametersException("You must provide a search query.")

        # reformat queries to be a comma-separated list with no wrapping
        # whitespace
        items = query.get("query").replace("\n", ",")
        if len(items.split(",")) > 15:
            raise QueryParametersException(
                "You cannot query more than 15 items at a time.")

        # simple!
        return {
            "items": query.get("max_posts"),
            "query": items,
            "scope": query.get("search_scope"),
            "item_type": query.get("search_type")
        }
예제 #3
0
    def validate_query(query, request, user):
        """
		Validate custom data input

		Confirms that the uploaded file is a valid CSV or tab file and, if so, returns
		some metadata.

		:param dict query:  Query parameters, from client-side.
		:param request:  Flask request
		:param User user:  User object of user who has submitted the query
		:return dict:  Safe query parameters
		"""

        # do we have an uploaded file?
        if "data_upload" not in request.files:
            raise QueryParametersException("No file was offered for upload.")

        platform = query.get("platform", "")
        if platform not in ImportFromExternalTool.required_columns:
            raise QueryParametersException("Invalid platform")

        file = request.files["data_upload"]
        if not file:
            raise QueryParametersException("No file was offered for upload.")

        wrapped_upload = io.TextIOWrapper(file, encoding="utf-8")

        # validate file as csv
        reader = csv.DictReader(wrapped_upload, delimiter=",")

        try:
            fields = reader.fieldnames
        except UnicodeDecodeError:
            raise QueryParametersException(
                "Uploaded file is not a well-formed CSV file.")

        # check if all required fields are present
        required = ImportFromExternalTool.required_columns[platform]
        missing = []
        for field in required:
            if field not in reader.fieldnames:
                missing.append(field)

        if missing:
            raise QueryParametersException(
                "The following required columns are not present in the csv file: %s"
                % ", ".join(missing))

        wrapped_upload.detach()

        # return metadata - the filename is sanitised and serves no purpose at
        # this point in time, but can be used to uniquely identify a dataset
        disallowed_characters = re.compile(r"[^a-zA-Z0-9._+-]")
        return {
            "filename": disallowed_characters.sub("", file.filename),
            "time": time.time(),
            "datasource": platform,
            "board": "upload",
            "platform": platform
        }
예제 #4
0
    def validate_query(query, request, user):
        """
        Validate input for a dataset query on the 4chan data source.

        Will raise a QueryParametersException if invalid parameters are
        encountered. Mutually exclusive parameters may also be sanitised by
        ignoring either of the mutually exclusive options.

        :param dict query:  Query parameters, from client-side.
        :param request:  Flask request
        :param User user:  User object of user who has submitted the query
        :return dict:  Safe query parameters
        """
        # this is the bare minimum, else we can't narrow down the full data set
        if not query.get("body_match", None):
            raise QueryParametersException("Please provide a search query")

        # the dates need to make sense as a range to search within
        query["min_date"], query["max_date"] = query["daterange"]
        del query["daterange"]

        # both dates need to be set, or none
        if query.get("min_date", None) and not query.get("max_date", None):
            raise QueryParametersException(
                "When setting a date range, please provide both an upper and lower limit."
            )

        return query
예제 #5
0
    def validate_query(query, request, user):
        """
		Validate custom data input

		Confirms that the uploaded file is a valid CSV file and, if so, returns
		some metadata.

		:param dict query:  Query parameters, from client-side.
		:param request:  Flask request
		:param User user:  User object of user who has submitted the query
		:return dict:  Safe query parameters
		"""
        # no query 4 u
        if not query.get("query", "").strip():
            raise QueryParametersException("You must provide a search query.")

        # reformat queries to be a comma-separated list with no wrapping
        # whitespace
        whitespace = re.compile(r"\s+")
        items = whitespace.sub("", query.get("query").replace("\n", ","))
        if len(items.split(",")) > 5:
            raise QueryParametersException(
                "You cannot query more than 5 items at a time.")

        # simple!
        return {
            "items": query.get("max_posts"),
            "query": items,
            "board": query.get("search_scope") + "s",  # used in web interface
            "search_scope": query.get("search_scope"),
            "scrape_comments": query.get("scrape_comments")
        }
예제 #6
0
    def validate_query(query, request, user):
        """
		Validate input for a dataset query on the Usenet data source.

		Will raise a QueryParametersException if invalid parameters are
		encountered. Mutually exclusive parameters may also be sanitised by
		ignoring either of the mutually exclusive options.

		:param dict query:  Query parameters, from client-side.
		:param request:  Flask request
		:param User user:  User object of user who has submitted the query
		:return dict:  Safe query parameters
		"""

        # this is the bare minimum, else we can't narrow down the full data set
        if not user.is_admin() and not user.get_value(
                "usenet.can_query_without_keyword",
                False) and not query.get("body_match", None) and not query.get(
                    "subject_match",
                    None) and query.get("search_scope", "") != "random-sample":
            raise QueryParametersException(
                "Please provide a body query, subject query or random sample size."
            )

        # the dates need to make sense as a range to search within
        query["min_date"], query["max_date"] = query.get("daterange")
        if any(query.get("daterange")) and not all(query.get("daterange")):
            raise QueryParametersException(
                "When providing a date range, set both an upper and lower limit."
            )

        del query["daterange"]

        # if we made it this far, the query can be executed
        return query
예제 #7
0
    def validate_query(query, request, user):
        """
		Validate custom data input

		Confirms that the uploaded file is a valid CSV file and, if so, returns
		some metadata.

		:param dict query:  Query parameters, from client-side.
		:param request:  Flask request
		:param User user:  User object of user who has submitted the query
		:return dict:  Safe query parameters
		"""

        # 'location' would be possible as well but apparently requires a login
        if query.get("search_scope",
                     "") not in ("hashtag", "username", "music"):
            raise QueryParametersException(
                "Invalid search scope: must be hashtag, username or music")

        # no query 4 u
        if not query.get("query", "").strip():
            raise QueryParametersException("You must provide a search query.")

        # 100 is mostly arbitrary - may need tweaking
        max_posts = 100 if not user.get_value(
            "tiktok.allow_more_posts", False) and not user.is_admin() else 1000
        if query.get("max_posts", ""):
            try:
                max_posts = min(abs(int(query.get("max_posts"))), max_posts)
            except TypeError:
                raise QueryParametersException(
                    "Provide a valid number of posts to query.")

        # reformat queries to be a comma-separated list with no wrapping
        # whitespace
        whitespace = re.compile(r"[@#\s]+")
        items = whitespace.sub("",
                               query.get("query").replace("\n",
                                                          ",")).split(",")

        if len(items) > 5:
            raise QueryParametersException(
                "You cannot query more than 5 items at a time.")

        sigil = {
            "hashtag": "#",
            "username": "******",
            "music": "🎶"
        }[query.get("search_scope")]
        items = ",".join([sigil + item for item in items if item])

        # simple!
        return {
            "items": max_posts,
            "query": items,
            "board": query.get("search_scope"),  # used in web interface
            "search_scope": query.get("search_scope")
        }
예제 #8
0
	def validate_query(query, request, user):
		"""
		Validate custom data input

		Confirms that the uploaded file is a valid CSV file and, if so, returns
		some metadata.

		:param dict query:  Query parameters, from client-side.
		:param request:  Flask request
		:param User user:  User object of user who has submitted the query
		:return dict:  Safe query parameters
		"""

		# 'location' would be possible as well but apparently requires a login
		if query.get("search_scope", "") not in ("tag", "blog"):
			raise QueryParametersException("Invalid search scope: must be tag or blog")

		# no query 4 u
		if not query.get("query", "").strip():
			raise QueryParametersException("You must provide a search query.")

		# reformat queries to be a comma-separated list
		items = query.get("query").replace("\n", ",").replace("#","").replace("\r", ",")
		items = items.split(",")
		items = [item.lstrip().rstrip() for item in items if item]

		# set before
		if query.get("max_date"):
			before = int(datetime.datetime.strptime(query.get("max_date", ""), "%Y-%m-%d").timestamp())
		else:
			before = None

		# set before
		if query.get("min_date"):
			after = int(datetime.datetime.strptime(query.get("min_date", ""), "%Y-%m-%d").timestamp())
		else:
			after = None

		# Not more than 5 plox
		if len(items) > 5:
			raise QueryParametersException("Only query for five or less tags or blogs.")
		# no query 4 u
		if not items:
			raise QueryParametersException("Invalid search search query.")

		# simple!
		return {
			"query": items,
			"board": query.get("search_scope") + "s",  # used in web interface
			"search_scope": query.get("search_scope"),
			"fetch_reblogs": bool(query.get("fetch_reblogs", False)),
			"before": before,
			"after": after
		}
예제 #9
0
    def validate_query(query, request, user):
        """
		Validate Telegram query

		:param dict query:  Query parameters, from client-side.
		:param request:  Flask request
		:param User user:  User object of user who has submitted the query
		:return dict:  Safe query parameters
		"""
        # no query 4 u
        if not query.get("query", "").strip():
            raise QueryParametersException("You must provide a search query.")

        if not query.get("session", "").strip():
            raise QueryParametersException(
                "You need to authenticate with the Telegram API first.")

        if not query.get("api_id", None) or not query.get("api_hash", None):
            raise QueryParametersException(
                "You need to provide valid Telegram API credentials first.")

        if "api_phone" in query:
            del query["api_phone"]

        # 5000 is mostly arbitrary - may need tweaking
        max_posts = 50000
        if query.get("max_posts", ""):
            try:
                max_posts = min(abs(int(query.get("max_posts"))), max_posts)
            except TypeError:
                raise QueryParametersException(
                    "Provide a valid number of messages to query.")

        # reformat queries to be a comma-separated list with no wrapping
        # whitespace
        whitespace = re.compile(r"\s+")
        items = whitespace.sub("", query.get("query").replace("\n", ","))
        if len(items.split(",")) > 25:
            raise QueryParametersException(
                "You cannot query more than 25 items at a time.")

        # eliminate empty queries
        items = ",".join([item for item in items.split(",") if item])

        # simple!
        return {
            "items": max_posts,
            "query": items,
            "board": "",  # needed for web interface
            "scrape-userinfo": bool(query.get("scrape-userinfo", False)),
            "session": query.get("session"),
            "api_id": query.get("api_id"),
            "api_hash": query.get("api_hash")
        }
예제 #10
0
    def validate_query(query, request, user):
        """
        Validate Parler query input

        :param dict query:  Query parameters, from client-side.
        :param request:  Flask request
        :param User user:  User object of user who has submitted the query
        :return dict:  Safe query parameters
        """
        # no query 4 u
        if not query.get("query", "").strip():
            raise QueryParametersException("You must provide a search query.")

        if not query.get("jst") or not query.get("mst"):
            raise QueryParametersException(
                "You must provide the 'JST' and 'MST' values")

        # 500 is mostly arbitrary - may need tweaking
        max_posts = 2500
        if query.get("max_posts", ""):
            try:
                max_posts = min(abs(int(query.get("max_posts"))), max_posts)
            except TypeError:
                raise QueryParametersException(
                    "Provide a valid number of posts to query.")

        # reformat queries to be a comma-separated list with no wrapping
        # whitespace
        whitespace = re.compile(r"\s+")
        items = whitespace.sub("", query.get("query").replace("\n", ","))
        if len(items.split(",")) > 15:
            raise QueryParametersException(
                "You cannot query more than 15 items at a time.")

        # the dates need to make sense as a range to search within
        after, before = query.get("daterange")
        if before and after and before < after:
            raise QueryParametersException(
                "Date range must start before it ends")

        query["min_date"], query["max_date"] = (after, before)

        # simple!
        return {
            "items": max_posts,
            "query": items,
            "min_date": query.get("min_date", None),
            "max_date": query.get("max_date", None),
            "jst": query.get("jst"),
            "mst": query.get("mst"),
            "scrape_echoes": bool(query.get("scrape_echoes", False))
        }
예제 #11
0
	def validate_query(query, request, user):
		"""
		Validate input for a dataset query on the 4chan data source.

		Will raise a QueryParametersException if invalid parameters are
		encountered. Mutually exclusive parameters may also be sanitised by
		ignoring either of the mutually exclusive options.

		:param dict query:  Query parameters, from client-side.
		:param request:  Flask request
		:param User user:  User object of user who has submitted the query
		:return dict:  Safe query parameters
		"""

		# this is the bare minimum, else we can't narrow down the full data set
		if not user.is_admin() and not user.get_value("4chan.can_query_without_keyword", False) and not query.get("body_match", None) and not query.get("subject_match", None) and query.get("search_scope",	"") != "random-sample":
			raise QueryParametersException("Please provide a message or subject search query")

		query["min_date"], query["max_date"] = query["daterange"]

		del query["daterange"]
		if query.get("search_scope") not in ("dense-threads",):
			del query["scope_density"]
			del query["scope_length"]

		if query.get("search_scope") not in ("match-ids",):
			del query["valid_ids"]

		return query
예제 #12
0
    def validate_query(query, request, user):
        """
		Validate custom data input

		Confirms that the uploaded file is a valid CSV file and, if so, returns
		some metadata.

		:param dict query:  Query parameters, from client-side.
		:param request:  Flask request
		:param User user:  User object of user who has submitted the query
		:return dict:  Safe query parameters
		"""

        # 'location' would be possible as well but apparently requires a login
        if query.get("search_scope", "") not in ("hashtag", "username"):
            raise QueryParametersException(
                "Invalid search scope: must be hashtag or username")

        # no query 4 u
        if not query.get("query", "").strip():
            raise QueryParametersException("You must provide a search query.")

        # 500 is mostly arbitrary - may need tweaking
        max_posts = 2500
        if query.get("max_posts", ""):
            try:
                max_posts = min(abs(int(query.get("max_posts"))), max_posts)
            except TypeError:
                raise QueryParametersException(
                    "Provide a valid number of posts to query.")

        # reformat queries to be a comma-separated list with no wrapping
        # whitespace
        whitespace = re.compile(r"\s+")
        items = whitespace.sub("", query.get("query").replace("\n", ","))
        if len(items.split(",")) > 5:
            raise QueryParametersException(
                "You cannot query more than 5 items at a time.")

        # simple!
        return {
            "items": max_posts,
            "query": items,
            "board": query.get("search_scope") + "s",  # used in web interface
            "search_scope": query.get("search_scope"),
            "scrape_comments": bool(query.get("scrape_comments", False))
        }
    def validate_query(query, request, user):
        """
        Validate input for a dataset query on the 4chan data source.

        Will raise a QueryParametersException if invalid parameters are
        encountered. Mutually exclusive parameters may also be sanitised by
        ignoring either of the mutually exclusive options.

        :param dict query:  Query parameters, from client-side.
        :param request:  Flask request
        :param User user:  User object of user who has submitted the query
        :return dict:  Safe query parameters
        """
        # this is the bare minimum, else we can't narrow down the full data set
        if not query.get("body_match", None) and not query.get("subject_match", None):
            raise QueryParametersException("Please provide a search query")

        if query.get("corpus") not in ("deu", "gbr"):
            raise QueryParametersException("Please choose a valid corpus to search within")

        # both dates need to be set, or none
        if query.get("min_date", None) and not query.get("max_date", None):
            raise QueryParametersException("When setting a date range, please provide both an upper and lower limit.")

        # the dates need to make sense as a range to search within
        if query.get("min_date", None) and query.get("max_date", None):
            try:
                before = int(query.get("max_date", ""))
                after = int(query.get("min_date", ""))
            except ValueError:
                raise QueryParametersException("Please provide valid dates for the date range.")

            if after < 946684800:
                raise QueryParametersException("Please provide valid dates for the date range.")

            if before < after:
                raise QueryParametersException(
                    "Please provide a valid date range where the start is before the end of the range.")

            if after - before > (6 * 86400 * 30.25):
                raise QueryParametersException("The date range for this query can span 6 months at most.")

            query["min_date"] = after
            query["max_date"] = before
        else:
            raise QueryParametersException("You need to provide a date range for your query")

        is_placeholder = re.compile("_proxy$")
        filtered_query = {}
        for field in query:
            if not is_placeholder.search(field):
                filtered_query[field] = query[field]

        # if we made it this far, the query can be executed
        return filtered_query
예제 #14
0
	def validate_query(query, request, user):
		"""
		Validate custom data input

		Confirms that the uploaded file is a valid CSV file and, if so, returns
		some metadata.

		:param dict query:  Query parameters, from client-side.
		:param request:  	Flask request
		:param User user:  	User object of user who has submitted the query
		:return dict:  		Safe query parameters
		"""
		# no query 4 u
		if not query.get("query", "").strip():
			raise QueryParametersException("You must provide a search query.")

		# reformat queries to be a comma-separated list
		items = query.get("query").replace("#","")
		items = items.split("\n")

		# Not more than 10 plox
		if len(items) > 10:
			raise QueryParametersException("Only query for ten or less tags or blogs." + str(len(items)))

		# no query 4 u
		if not items:
			raise QueryParametersException("Search query cannot be empty.")

		# So it shows nicely in the frontend.
		items = ", ".join([item.lstrip().rstrip() for item in items if item])
		
		# the dates need to make sense as a range to search within
		query["min_date"], query["max_date"] = query.get("daterange")
		if any(query.get("daterange")) and not all(query.get("daterange")):
			raise QueryParametersException("When providing a date range, set both an upper and lower limit.")

		del query["daterange"]

		query["query"] = items
		query["board"] = query.get("search_scope") + "s"  # used in web interface

		# if we made it this far, the query can be executed
		return query
예제 #15
0
    def validate_query(query, request, user):
        """
        Validate input for a dataset query on the Twitter data source.

        Will raise a QueryParametersException if invalid parameters are
        encountered. Parameters are additionally sanitised.

        :param dict query:  Query parameters, from client-side.
        :param request:  Flask request
        :param User user:  User object of user who has submitted the query
        :return dict:  Safe query parameters
        """

        # this is the bare minimum, else we can't narrow down the full data set
        if not query.get("query", None):
            raise QueryParametersException("Please provide a query.")

        if not query.get("api_bearer_token", None):
            raise QueryParametersException(
                "Please provide a valid bearer token.")

        if len(query.get("query")) > 1024:
            raise QueryParametersException(
                "Twitter API queries cannot be longer than 1024 characters.")

        # the dates need to make sense as a range to search within
        # but, on Twitter, you can also specify before *or* after only
        after, before = query.get("daterange")
        if before and after and before < after:
            raise QueryParametersException(
                "Date range must start before it ends")

        # if we made it this far, the query can be executed
        return {
            "query": query.get("query"),
            "api_bearer_token": query.get("api_bearer_token"),
            "min_date": after,
            "max_date": before,
            "amount": query.get("amount")
        }
예제 #16
0
    def validate_query(query, request, user):
        """
		Validate Telegram query

		:param dict query:  Query parameters, from client-side.
		:param request:  Flask request
		:param User user:  User object of user who has submitted the query
		:return dict:  Safe query parameters
		"""
        # no query 4 u
        if not query.get("query", "").strip():
            raise QueryParametersException("You must provide a search query.")

        if not query.get("api_id", None) or not query.get(
                "api_hash", None) or not query.get("api_phone", None):
            raise QueryParametersException(
                "You need to provide valid Telegram API credentials first.")

        # reformat queries to be a comma-separated list with no wrapping
        # whitespace
        whitespace = re.compile(r"\s+")
        items = whitespace.sub("", query.get("query").replace("\n", ","))
        if len(items.split(",")) > 25:
            raise QueryParametersException(
                "You cannot query more than 25 items at a time.")

        # eliminate empty queries
        items = ",".join([item for item in items.split(",") if item])

        # simple!
        return {
            "items": query.get("max_posts"),
            "query": items,
            "board": "",  # needed for web interface
            "api_id": query.get("api_id"),
            "api_hash": query.get("api_hash"),
            "api_phone": query.get("api_phone")
        }
예제 #17
0
	def validate_query(query, request, user):
		"""
		Validate input for a dataset query on the Guardian data source.

		Will raise a QueryParametersException if invalid parameters are
		encountered. Mutually exclusive parameters may also be sanitised by
		ignoring either of the mutually exclusive options.

		:param dict query:  Query parameters, from client-side.
		:param request:  Flask request
		:param User user:  User object of user who has submitted the query
		:return dict:  Safe query parameters
		"""
		# this is the bare minimum, else we can't narrow down the full data set
		if not query.get("body_match", None) and not query.get("subject_match", None):
			raise QueryParametersException("Please provide a search query")

		# only one of two dense threads options may be chosen at the same time, and
		# it requires valid density and length parameters. full threads is implied,
		# so it is otherwise left alone here
		if query.get("search_scope", "") == "dense-threads":
			try:
				dense_density = int(query.get("scope_density", ""))
			except ValueError:
				raise QueryParametersException("Please provide a valid numerical density percentage.")

			if dense_density < 15 or dense_density > 100:
				raise QueryParametersException("Please provide a density percentage between 15 and 100.")

			try:
				dense_length = int(query.get("scope_length", ""))
			except ValueError:
				raise QueryParametersException("Please provide a valid numerical dense thread length.")

			if dense_length < 30:
				raise QueryParametersException("Please provide a dense thread length of at least 30.")

		# the dates need to make sense as a range to search within
		if not all(query.get("daterange")):
			raise QueryParametersException("You must provide a date range")

		query["min_date"], query["max_date"] = query.get("daterange")
		del query["daterange"]

		if query["max_date"] and (query["max_date"] - query["min_date"]) > (86400 * 31 * 6):
			raise QueryParametersException("Date range may span 6 months at most")

		# if we made it this far, the query can be executed
		return query
예제 #18
0
파일: user_input.py 프로젝트: p-charis/4cat
    def parse_value(settings, choice, silently_correct=True):
        """
        Filter user input

        Makes sure user input for post-processors is valid and within the
        parameters specified by the post-processor

        :param obj settings:  Settings, including defaults and valid options
        :param choice:  The chosen option, to be parsed
        :param bool silently_correct:  If true, replace invalid values with the
        given default value; else, raise a QueryParametersException if a value
        is invalid.

        :return:  Validated and parsed input
        """
        input_type = settings.get("type", "")
        if input_type in (UserInput.OPTION_INFO, UserInput.OPTION_DIVIDER):
            # these are structural form elements and can never return a value
            return None

        elif input_type == UserInput.OPTION_TOGGLE:
            # simple boolean toggle
            return choice is not None

        elif input_type in (UserInput.OPTION_DATE, UserInput.OPTION_DATERANGE):
            # parse either integers (unix timestamps) or try to guess the date
            # format (the latter may be used for input if JavaScript is turned
            # off in the front-end and the input comes from there)
            value = None
            try:
                value = int(choice)
            except ValueError:
                parsed_choice = parse_datetime(choice)
                value = int(parsed_choice.timestamp())
            finally:
                return value

        elif input_type == UserInput.OPTION_MULTI:
            # any number of values out of a list of possible values
            # comma-separated during input, returned as a list of valid options
            if not choice:
                return settings.get("default", [])

            chosen = choice.split(",")
            return [item for item in chosen if item in settings.get("options", [])]

        elif input_type == UserInput.OPTION_CHOICE:
            # select box
            # one out of multiple options
            # return option if valid, or default
            if choice not in settings.get("options"):
                if not silently_correct:
                    raise QueryParametersException("Invalid value selected; must be one of %s." % ", ".join(settings.get("options", {}).keys()))
                else:
                    return settings.get("default", "")
            else:
                return choice

        elif input_type in (UserInput.OPTION_TEXT, UserInput.OPTION_TEXT_LARGE):
            # text string
            # optionally clamp it as an integer; return default if not a valid
            # integer
            if "max" in settings:
                try:
                    choice = min(settings["max"], int(choice))
                except (ValueError, TypeError) as e:
                    if not silently_correct:
                        raise QueryParametersException("Provide a value of %i or lower." % settings["max"])

                    choice = settings.get("default")

            if "min" in settings:
                try:
                    choice = max(settings["min"], int(choice))
                except (ValueError, TypeError) as e:
                    if not silently_correct:
                        raise QueryParametersException("Provide a value of %i or more." % settings["min"])

                    choice = settings.get("default")

            if choice is None or choice == "":
                choice = settings.get("default")

            if choice is None:
                choice = 0 if "min" in settings or "max" in settings else ""

            return choice

        else:
            # no filtering
            return choice
예제 #19
0
    def validate_query(query, request, user):
        """
		Validate input for a dataset query on the 4chan data source.

		Will raise a QueryParametersException if invalid parameters are
		encountered. Mutually exclusive parameters may also be sanitised by
		ignoring either of the mutually exclusive options.

		:param dict query:  Query parameters, from client-side.
		:param request:  Flask request
		:param User user:  User object of user who has submitted the query
		:return dict:  Safe query parameters
		"""

        # this is the bare minimum, else we can't narrow down the full data set
        if not user.is_admin() and not user.get_value(
                "4chan.can_query_without_keyword",
                False) and not query.get("body_match", None) and not query.get(
                    "subject_match",
                    None) and query.get("search_scope", "") != "random-sample":
            raise QueryParametersException(
                "Please provide a body query, subject query or random sample size."
            )

        # Make sure to accept only a body or subject match.
        if not query.get("body_match", None) and query.get(
                "subject_match", None):
            query["body_match"] = ""
        elif query.get("body_match",
                       None) and not query.get("subject_match", None):
            query["subject_match"] = ""

        # body query and full threads are incompatible, returning too many posts
        # in most cases
        if query.get("body_match", None):
            if "full_threads" in query:
                del query["full_threads"]

        # random sample requires a sample size, and is additionally incompatible
        # with full threads
        if query.get("search_scope", "") == "random-sample":
            try:
                sample_size = int(query.get("random_amount", 0))
            except ValueError:
                raise QueryParametersException(
                    "Please provide a valid numerical sample size.")

            if sample_size < 1 or sample_size > 100000:
                raise QueryParametersException(
                    "Please provide a sample size between 1 and 100000.")

            if "full_threads" in query:
                del query["full_threads"]

        # only one of two dense threads options may be chosen at the same time, and
        # it requires valid density and length parameters. full threads is implied,
        # so it is otherwise left alone here
        if query.get("search_scope", "") == "dense-threads":
            try:
                dense_density = int(query.get("scope_density", ""))
            except ValueError:
                raise QueryParametersException(
                    "Please provide a valid numerical density percentage.")

            if dense_density < 15 or dense_density > 100:
                raise QueryParametersException(
                    "Please provide a density percentage between 15 and 100.")

            try:
                dense_length = int(query.get("scope_length", ""))
            except ValueError:
                raise QueryParametersException(
                    "Please provide a valid numerical dense thread length.")

            if dense_length < 30:
                raise QueryParametersException(
                    "Please provide a dense thread length of at least 30.")

        # both dates need to be set, or none
        if query.get("min_date", None) and not query.get("max_date", None):
            raise QueryParametersException(
                "When setting a date range, please provide both an upper and lower limit."
            )

        # the dates need to make sense as a range to search within
        if query.get("min_date", None) and query.get("max_date", None):
            try:
                before = int(query.get("max_date", ""))
                after = int(query.get("min_date", ""))
            except ValueError:
                raise QueryParametersException(
                    "Please provide valid dates for the date range.")

            if before < after:
                raise QueryParametersException(
                    "Please provide a valid date range where the start is before the end of the range."
                )

            query["min_date"] = after
            query["max_date"] = before

        is_placeholder = re.compile("_proxy$")
        filtered_query = {}
        for field in query:
            if not is_placeholder.search(field):
                filtered_query[field] = query[field]

        # if we made it this far, the query can be executed
        return filtered_query
예제 #20
0
	def validate_query(query, request, user):
		"""
		Validate custom data input

		Confirms that the uploaded file is a valid CSV file and, if so, returns
		some metadata.

		:param dict query:  Query parameters, from client-side.
		:param request:  Flask request
		:param User user:  User object of user who has submitted the query
		:return dict:  Safe query parameters
		"""

		# 'location' would be possible as well but apparently requires a login
		if query.get("search_scope", "") not in ("tag", "blog"):
			raise QueryParametersException("Invalid search scope: must be tag or blog")

		# no query 4 u
		if not query.get("query", "").strip():
			raise QueryParametersException("You must provide a search query.")

		# reformat queries to be a comma-separated list
		items = query.get("query").replace("\n", ",").replace("#","").replace("\r", ",")
		items = items.split(",")
		items = [item.lstrip().rstrip() for item in items if item]

		print(query.get("max_date"), query.get("min_date"))

		# Set dates, if given.
		if query.get("max_date") or query.get("min_date"):

			# On some OSes, the date is submitted as dd-mm-yyyy. Make sure to also fetch these.
			ddmmyyyy = r"^([0-2][0-9]|(3)[0-1])(-)(((0)[0-9])|((1)[0-2]))(-)\d{4}$"
			date_format = "%Y-%m-%d"

			# Before
			if query.get("max_date"):
				try:
					if re.match(ddmmyyyy, query.get("max_date","")):
						date_format = "%d-%m-%Y"
					before = int(datetime.datetime.strptime(query.get("max_date", ""), date_format).timestamp())
				except ValueError:
					raise QueryParametersException("Invalid value for max date %s " % str(query.get("max_date")))
			else:
				before = None

			# After
			if query.get("min_date"):
				date_format = "%Y-%m-%d"
				try:
					if re.match(ddmmyyyy, query.get("min_date","")):
						date_format = "%d-%m-%Y"
					after = int(datetime.datetime.strptime(query.get("min_date", ""), date_format).timestamp())
				except ValueError:
					raise QueryParametersException("Invalid value for min date %s " % str(query.get("min_date")))
			else:
				after = None
		else:
			before = None
			after = None

		# Not more than 5 plox
		if len(items) > 5:
			raise QueryParametersException("Only query for five or less tags or blogs.")
		# no query 4 u
		if not items:
			raise QueryParametersException("Invalid search search query.")

		# simple!
		return {
			"query": items,
			"board": query.get("search_scope") + "s",  # used in web interface
			"search_scope": query.get("search_scope"),
			"fetch_reblogs": bool(query.get("fetch_reblogs", False)),
			"before": before,
			"after": after
		}
예제 #21
0
    def validate_query(query, request, user):
        """
		Validate input for a dataset query on the 4chan data source.

		Will raise a QueryParametersException if invalid parameters are
		encountered. Mutually exclusive parameters may also be sanitised by
		ignoring either of the mutually exclusive options.

		:param dict query:  Query parameters, from client-side.
		:param request:  Flask request
		:param User user:  User object of user who has submitted the query
		:return dict:  Safe query parameters
		"""
        # we need a board!
        r_prefix = re.compile(r"^/?r/")
        boards = [
            r_prefix.sub("", board)
            for board in query.get("board", "").split(",") if board.strip()
        ]

        if not boards:
            raise QueryParametersException(
                "Please provide a board or a comma-separated list of boards to query."
            )

        # ignore leading r/ for boards
        query["board"] = ",".join(boards)

        # this is the bare minimum, else we can't narrow down the full data set
        if not user.is_admin() and not user.get_value(
                "reddit.can_query_without_keyword", False) and not query.get(
                    "body_match", "").strip() and not query.get(
                        "subject_match", "").strip():
            raise QueryParametersException(
                "Please provide a body query or subject query.")

        # body query and full threads are incompatible, returning too many posts
        # in most cases
        if query.get("body_match", None):
            if "full_threads" in query:
                del query["full_threads"]

        # Make sure no body or subject searches starting with just a minus sign are possible, e.g. "-Trump"
        if query.get("body_match", None) or query.get("subject_match", None):

            queries_to_check = []
            if query.get("body_match", None):
                queries_to_check += [
                    body_query.strip()
                    for body_query in query["body_match"].split(" ")
                ]
            if query.get("subject_match", None):
                queries_to_check += [
                    subject_query.strip()
                    for subject_query in query["subject_match"].split(" ")
                ]
            startswith_minus = [
                query_check.startswith("-") for query_check in queries_to_check
            ]
            if all(startswith_minus):
                raise QueryParametersException(
                    "Please provide body queries that do not start with a minus sign."
                )

        # only one of two dense threads options may be chosen at the same time, and
        # it requires valid density and length parameters. full threads is implied,
        # so it is otherwise left alone here
        if query.get("search_scope", "") == "dense-threads":
            try:
                dense_density = int(query.get("scope_density", ""))
            except ValueError:
                raise QueryParametersException(
                    "Please provide a valid numerical density percentage.")

            if dense_density < 15 or dense_density > 100:
                raise QueryParametersException(
                    "Please provide a density percentage between 15 and 100.")

            try:
                dense_length = int(query.get("scope_length", ""))
            except ValueError:
                raise QueryParametersException(
                    "Please provide a valid numerical dense thread length.")

            if dense_length < 30:
                raise QueryParametersException(
                    "Please provide a dense thread length of at least 30.")

        # both dates need to be set, or none
        if query.get("min_date", None) and not query.get("max_date", None):
            raise QueryParametersException(
                "When setting a date range, please provide both an upper and lower limit."
            )

        # the dates need to make sense as a range to search within
        if query.get("min_date", None) and query.get("max_date", None):
            try:
                before = int(query.get("max_date", ""))
                after = int(query.get("min_date", ""))
            except ValueError:
                raise QueryParametersException(
                    "Please provide valid dates for the date range.")

            if before < after:
                raise QueryParametersException(
                    "Please provide a valid date range where the start is before the end of the range."
                )

            query["min_date"] = after
            query["max_date"] = before

        is_placeholder = re.compile("_proxy$")
        filtered_query = {}
        for field in query:
            if not is_placeholder.search(field):
                filtered_query[field] = query[field]

        # if we made it this far, the query can be executed
        return filtered_query
예제 #22
0
    def validate_query(query, request, user):
        """
		Validate custom data input

		Confirms that the uploaded file is a valid CSV or tab file and, if so, returns
		some metadata.

		:param dict query:  Query parameters, from client-side.
		:param request:  Flask request
		:param User user:  User object of user who has submitted the query
		:return dict:  Safe query parameters
		"""

        # do we have an uploaded file?
        if "data_upload" not in request.files:
            raise QueryParametersException("No file was offered for upload.")

        file = request.files["data_upload"]
        if not file:
            raise QueryParametersException("No file was offered for upload.")

        encoding = SearchCustom.sniff_encoding(file)

        wrapped_file = io.TextIOWrapper(file, encoding=encoding)
        sample = wrapped_file.read(1024 * 1024)
        wrapped_file.seek(0)
        dialect = csv.Sniffer().sniff(sample, delimiters=(",", ";", "\t"))

        # With validated csvs, save as is but make sure the raw file is sorted
        reader = csv.DictReader(wrapped_file, dialect=dialect)

        try:
            fields = reader.fieldnames
        except UnicodeDecodeError:
            raise QueryParametersException(
                "Uploaded file is not a well-formed CSV or TAB file.")

        # check if all required fields are present
        required = ("id", "thread_id", "subject", "author", "body",
                    "timestamp")
        missing = []
        for field in required:
            if field not in reader.fieldnames:
                missing.append(field)

        if missing:
            raise QueryParametersException(
                "The following required columns are not present in the csv file: %s"
                % ", ".join(missing))

        try:
            row = reader.__next__()
            try:
                parse_datetime(row["timestamp"])
            except ValueError:
                raise QueryParametersException(
                    "Your 'timestamp' column does not use a recognisable format (yyyy-mm-dd hh:mm:ss is recommended)"
                )
        except StopIteration:
            pass

        wrapped_file.detach()

        # Whether to strip the HTML tags
        strip_html = False
        if query.get("strip_html"):
            strip_html = True

        # return metadata - the filename is sanitised and serves no purpose at
        # this point in time, but can be used to uniquely identify a dataset
        disallowed_characters = re.compile(r"[^a-zA-Z0-9._+-]")
        return {
            "filename": disallowed_characters.sub("", file.filename),
            "time": time.time(),
            "datasource": "custom",
            "board": "upload",
            "strip_html": strip_html
        }
예제 #23
0
	def validate_query(query, request, user):
		"""
		Validate custom data input

		Confirms that the uploaded file is a valid CSV file and, if so, returns
		some metadata.

		:param dict query:  Query parameters, from client-side.
		:param request:  Flask request
		:param User user:  User object of user who has submitted the query
		:return dict:  Safe query parameters
		"""

		# 'location' would be possible as well but apparently requires a login
		if query.get("search_scope", "") not in ("hashtag", "username"):
			raise QueryParametersException("Invalid search scope: must be hashtag or username")

		# no query 4 u
		if not query.get("query", "").strip():
			raise QueryParametersException("You must provide a search query.")

		if not query.get("username", None).strip() or not query.get("password", None).strip():
			raise QueryParametersException("You need to provide a username and password")

		username = query.get("username")
		password = query.get("password")
		login_tester = instaloader.Instaloader()
		try:
			login_tester.login(username, password)
		except instaloader.TwoFactorAuthRequiredException:
			raise QueryParametersException(
				"Two-factor authentication with Instagram is not available via 4CAT at this time. Disable it for your Instagram account and try again.")
		except (instaloader.InvalidArgumentException, instaloader.BadCredentialsException):
			raise QueryParametersException("Invalid Instagram username or password.")

		# there are some fundamental limits to how safe we can make this, but
		# we can at least encrypt it so that if someone has access to the
		# database but not the 4CAT config file, they cannot use the login
		# details
		# we use the 4CAT anyonymisation salt (which *should* be a long,
		# random string)
		# making sure the 4CAT config file is kept safe is left as an exercise
		# for the reader...
		key = SearchInstagram.salt_to_fernet_key()
		fernet = Fernet(key)
		obfuscated_login = fernet.encrypt(json.dumps([username, password]).encode("utf-8"))

		# 500 is mostly arbitrary - may need tweaking
		max_posts = 2500
		if query.get("max_posts", ""):
			try:
				max_posts = min(abs(int(query.get("max_posts"))), max_posts)
			except TypeError:
				raise QueryParametersException("Provide a valid number of posts to query.")

		# reformat queries to be a comma-separated list with no wrapping
		# whitespace
		whitespace = re.compile(r"\s+")
		items = whitespace.sub("", query.get("query").replace("\n", ","))
		if len(items.split(",")) > 5:
			raise QueryParametersException("You cannot query more than 5 items at a time.")

		# simple!
		return {
			"login": obfuscated_login.decode("utf-8"),
			"items": max_posts,
			"query": items,
			"board": query.get("search_scope") + "s",  # used in web interface
			"search_scope": query.get("search_scope"),
			"scrape_comments": bool(query.get("scrape_comments", False))
		}
예제 #24
0
	def validate_query(query, request, user):
		"""
		Validate input for a dataset query on the 4chan data source.

		Will raise a QueryParametersException if invalid parameters are
		encountered. Mutually exclusive parameters may also be sanitised by
		ignoring either of the mutually exclusive options.

		:param dict query:  Query parameters, from client-side.
		:param request:  Flask request
		:param User user:  User object of user who has submitted the query
		:return dict:  Safe query parameters
		"""
		# this is the bare minimum, else we can't narrow down the full data set
		if not query.get("body_match", None) and not query.get("subject_match", None):
			raise QueryParametersException("Please provide a search query")

		# only one of two dense threads options may be chosen at the same time, and
		# it requires valid density and length parameters. full threads is implied,
		# so it is otherwise left alone here
		if query.get("search_scope", "") == "dense-threads":
			try:
				dense_density = int(query.get("scope_density", ""))
			except ValueError:
				raise QueryParametersException("Please provide a valid numerical density percentage.")

			if dense_density < 15 or dense_density > 100:
				raise QueryParametersException("Please provide a density percentage between 15 and 100.")

			try:
				dense_length = int(query.get("scope_length", ""))
			except ValueError:
				raise QueryParametersException("Please provide a valid numerical dense thread length.")

			if dense_length < 30:
				raise QueryParametersException("Please provide a dense thread length of at least 30.")

		# both dates need to be set, or none
		if query.get("min_date", None) and not query.get("max_date", None):
			raise QueryParametersException("When setting a date range, please provide both an upper and lower limit.")

		# the dates need to make sense as a range to search within
		if query.get("min_date", None) and query.get("max_date", None):
			try:
				before = int(query.get("max_date", ""))
				after = int(query.get("min_date", ""))
			except ValueError:
				raise QueryParametersException("Please provide valid dates for the date range.")

			if after < 946684800:
				raise QueryParametersException("Please provide valid dates for the date range.")

			if before < after:
				raise QueryParametersException(
					"Please provide a valid date range where the start is before the end of the range.")

			if after - before > (6 * 86400 * 30.25):
				raise QueryParametersException("The date range for this query can span 6 months at most.")

			query["min_date"] = after
			query["max_date"] = before
		else:
			raise QueryParametersException("You need to provide a date range for your query")

		is_placeholder = re.compile("_proxy$")
		filtered_query = {}
		for field in query:
			if not is_placeholder.search(field):
				filtered_query[field] = query[field]

		# if we made it this far, the query can be executed
		return filtered_query
예제 #25
0
파일: user_input.py 프로젝트: p-charis/4cat
    def parse_all(options, input, silently_correct=True):
        """
        Parse form input for the provided options

        Ignores all input not belonging to any of the defined options: parses
        and sanitises the rest, and returns a dictionary with the sanitised
        options. If an option is *not* present in the input, the default value
        is used, and if that is absent, `None`.

        In other words, this ensures a dictionary with 1) only white-listed
        keys, 2) a value of an expected type for each key.

        :param dict options:  Options, as a name -> settings dictionary
        :param dict input:  Input, as a form field -> value dictionary
        :param bool silently_correct:  If true, replace invalid values with the
        given default value; else, raise a QueryParametersException if a value
        is invalid.

        :return dict:  Sanitised form input
        """
        parsed_input = {}

        # all parameters are submitted as option-[parameter ID], this is an 
        # artifact of how the web interface works and we can simply remove the
        # prefix
        input = {re.sub(r"^option-", "", field): input[field] for field in input}

        for option, settings in options.items():
            if settings.get("type") in (UserInput.OPTION_DIVIDER, UserInput.OPTION_INFO):
                # these are structural form elements and never have a value
                continue

            elif settings.get("type") == UserInput.OPTION_DATERANGE:
                # special case, since it combines two inputs
                option_min = option + "-min"
                option_max = option + "-max"

                # normally this is taken care of client-side, but in case this
                # didn't work, try to salvage it server-side
                if option_min not in input or input.get(option_min) == "-1":
                    option_min += "_proxy"

                if option_max not in input or input.get(option_max) == "-1":
                    option_max += "_proxy"

                # save as a tuple of unix timestamps (or None)
                after, before = (UserInput.parse_value(settings, input.get(option_min), silently_correct), UserInput.parse_value(settings, input.get(option_max), silently_correct))

                if before and after and after > before:
                    if not silently_correct:
                        raise QueryParametersException("End of date range must be after beginning of date range.")
                    else:
                        before = after

                parsed_input[option] = (after, before)

            elif settings.get("type") == UserInput.OPTION_TOGGLE:
                # special case too, since if a checkbox is unchecked, it simply
                # does not show up in the input
                parsed_input[option] = option in input

            elif option not in input:
                # not provided? use default
                parsed_input[option] = settings.get("default", None)

            else:
                # normal parsing and sanitisation
                parsed_input[option] = UserInput.parse_value(settings, input[option], silently_correct)

        return parsed_input
예제 #26
0
    def validate_query(query, request, user):
        """
		Validate input for a dataset query on the 4chan data source.

		Will raise a QueryParametersException if invalid parameters are
		encountered. Mutually exclusive parameters may also be sanitised by
		ignoring either of the mutually exclusive options.

		:param dict query:  Query parameters, from client-side.
		:param request:  Flask request
		:param User user:  User object of user who has submitted the query
		:return dict:  Safe query parameters
		"""
        # we need a board!
        r_prefix = re.compile(r"^/?r/")
        boards = [
            r_prefix.sub("", board)
            for board in query.get("board", "").split(",") if board.strip()
        ]

        if not boards:
            raise QueryParametersException(
                "Please provide a board or a comma-separated list of boards to query."
            )

        # ignore leading r/ for boards
        query["board"] = ",".join(boards)

        # this is the bare minimum, else we can't narrow down the full data set
        if not user.is_admin() and not user.get_value(
                "reddit.can_query_without_keyword", False) and not query.get(
                    "body_match", "").strip() and not query.get(
                        "subject_match", "").strip():
            raise QueryParametersException(
                "Please provide a body query or subject query.")

        # body query and full threads are incompatible, returning too many posts
        # in most cases
        if query.get("body_match", None):
            if "full_threads" in query:
                del query["full_threads"]

        # Make sure no body or subject searches starting with just a minus sign are possible, e.g. "-Trump"
        if query.get("body_match", None) or query.get("subject_match", None):
            queries_to_check = []

            if query.get("body_match", None):
                queries_to_check += [
                    body_query.strip()
                    for body_query in query["body_match"].split(" ")
                ]

            if query.get("subject_match", None):
                queries_to_check += [
                    subject_query.strip()
                    for subject_query in query["subject_match"].split(" ")
                ]

            startswith_minus = [
                query_check.startswith("-") for query_check in queries_to_check
            ]
            if all(startswith_minus):
                raise QueryParametersException(
                    "Please provide body queries that do not start with a minus sign."
                )

        # both dates need to be set, or none
        if query.get("min_date", None) and not query.get("max_date", None):
            raise QueryParametersException(
                "When setting a date range, please provide both an upper and lower limit."
            )

        # the dates need to make sense as a range to search within
        query["min_date"], query["max_date"] = query.get("daterange")

        if "*" in query.get("body_match", "") and not user.get_value(
                "reddit.can_query_without_keyword", False):
            raise QueryParametersException(
                "Wildcard queries are not allowed as they typically return too many results to properly process."
            )

        if "*" in query.get("board", "") and not user.get_value(
                "reddit.can_query_without_keyword", False):
            raise QueryParametersException(
                "Wildcards are not allowed for boards as this typically returns too many results to properly process."
            )

        del query["daterange"]
        if query.get("search_scope") not in ("dense-threads", ):
            del query["scope_density"]
            del query["scope_length"]

        # if we made it this far, the query can be executed
        return query
예제 #27
0
    def validate_query(query, request, user):
        """
		Validate custom data input

		Confirms that the uploaded file is a valid CSV or tab file and, if so, returns
		some metadata.

		:param dict query:  Query parameters, from client-side.
		:param request:  Flask request
		:param User user:  User object of user who has submitted the query
		:return dict:  Safe query parameters
		"""

        # do we have an uploaded file?
        if "data_upload" not in request.files:
            raise QueryParametersException("No file was offered for upload.")

        file = request.files["data_upload"]
        if not file:
            raise QueryParametersException("No file was offered for upload.")

        wrapped_upload = io.TextIOWrapper(file, encoding="utf-8")

        # validate file as tab
        if file.filename.endswith(".tab"):
            reader = csv.DictReader(wrapped_upload,
                                    delimiter="\t",
                                    quoting=csv.QUOTE_NONE)

        # validate file as csv
        else:
            reader = csv.DictReader(wrapped_upload)

        try:
            fields = reader.fieldnames
        except UnicodeDecodeError:
            raise QueryParametersException(
                "Uploaded file is not a well-formed CSV or TAB file.")

        # check if all required fields are present
        required = ("id", "thread_id", "subject", "author", "body",
                    "timestamp")
        missing = []
        for field in required:
            if field not in reader.fieldnames:
                missing.append(field)

        if missing:
            raise QueryParametersException(
                "The following required columns are not present in the csv file: %s"
                % ", ".join(missing))

        try:
            row = reader.__next__()
            try:
                datetime.datetime.strptime(row["timestamp"],
                                           "%Y-%m-%d %H:%M:%S")
            except ValueError:
                raise QueryParametersException(
                    "Your 'timestamp' column does not have the required format (YYY-MM-DD hh:mm:ss)"
                )
        except StopIteration:
            pass

        wrapped_upload.detach()

        # Whether to strip the HTML tags
        strip_html = False
        if query.get("strip_html"):
            strip_html = True

        # return metadata - the filename is sanitised and serves no purpose at
        # this point in time, but can be used to uniquely identify a dataset
        disallowed_characters = re.compile(r"[^a-zA-Z0-9._+-]")
        return {
            "filename": disallowed_characters.sub("", file.filename),
            "time": time.time(),
            "datasource": "custom",
            "board": "upload",
            "strip_html": strip_html
        }