def __init__(self):
        """
			Loads the types to block and allow from env. variables. Initialization fails
			if there are types listed both as blocked and allowed at the same time.
		"""
        self.block_types = self.load_types("BLOCK_TYPES")
        self.allow_types = self.load_types("ALLOW_TYPES")

        if len(list(set(self.block_types) & set(self.allow_types))) > 0:
            Logging.log(
                "Some types of requests are listed as allowed and also as blocked - choose one!",
                Logging.LEVEL_ERROR)
            exit()
コード例 #2
0
	def __init__(self):
		"""
			Initializes the object, especially loads the models.
			Also reads the settings from the env. variables.
		"""
		# check models directory and index
		if not os.path.isdir(RequestChecker.MODELS_DIR) or not os.path.isfile(RequestChecker.MODELS_DIR + "index.json"):
			Logging.log("Using dummy model!", Logging.LEVEL_WARN)
			os.system("cp -R /dummy-model/ " + RequestChecker.MODELS_DIR)
		
		# check index file of model
		self.models = json.load(open(RequestChecker.MODELS_DIR + "index.json", 'r'))
		if 'lda' not in self.models or 'name' not in self.models or 'nn-crawl' not in self.models or 'nn-attack' not in self.models  or 'nn-types' not in self.models:
			Logging.log("Invalid Model Index!", Logging.LEVEL_ERROR)
			exit()

		Logging.log("Found Model " + self.models['name'])

		# load default 2 class models
		self.lda = LDAPredictor(self.models['lda'], RequestChecker.MODELS_DIR)
		if os.environ.get("BLOCK_CRAWLING") == "true":
			self.nn = NNPredictor(self.models['nn-crawl'], RequestChecker.MODELS_DIR)
		else:
			self.nn = NNPredictor(self.models['nn-attack'], RequestChecker.MODELS_DIR)

		# load models to get type
		self.type_handling = TypeHandler()
		if self.type_handling.is_active():
			Logging.log("TypeHandler active")
			self.nn_types = NNPredictor(self.models['nn-types'], RequestChecker.MODELS_DIR)

		# connector to use for models
		# 	or => one model classifies as safe
		#	and => both models classify as safe
		if "APPROACH_CONNECTOR" in os.environ:
			self.connector = "and" if os.environ.get("APPROACH_CONNECTOR") == "and" else "or" 
		else:
			self.connector = "or"
		Logging.log('Using connector "' + self.connector + '"')

		# model to use 
		if "APPROACH_USE" in os.environ:
			self.use_model = os.environ.get("APPROACH_USE") if os.environ.get("APPROACH_USE") in ['lda', 'nn'] else "lda,nn" 
		else:
			self.use_model = "lda,nn"
		Logging.log('Using model(s) "' + self.use_model + '"')

		# create a notification (=mail) object
		if Notifications.is_active():
			self.notifications = Notifications()
コード例 #3
0
ファイル: cl.py プロジェクト: zoidzay/Craigslist-Hawk
def main():
    data_config_file = 'data_config.json'
    email_config_file = 'email_config.json'
    stored_posts_file = 'stored_posts.json'
    log_file = datetime.now().strftime('%Y-%m-%dT%H:%M:%S%z') + '.log'

    global Log
    Log = Logging(log_file)

    data_config = LoadJson(data_config_file)
    email_config = LoadJson(email_config_file)

    if int(data_config['logging_enabled']):
        Log.start()

    cl_listings = []

    if not IsEmpty(stored_posts_file):
        sp = LoadJson(stored_posts_file)
        [cl_listings.append(CL_Post(stored_post)) for stored_post in sp]
        Log.log('Imported ' + str(len(cl_listings)) + ' saved posts')

    socket.setdefaulttimeout(10)

    threads_required = 0
    for _ in data_config['locations']:
        for __ in data_config['categories']:
            threads_required += 1

    threads = [None] * threads_required
    results = [None] * threads_required

    index = 0
    for location in data_config['locations']:
        for category in data_config['categories']:
            threads[index] = Thread(target=PullFeeds, args=(location, category, results, index))
            threads[index].start()
            index += 1

    [threads[i].join() for i in range(threads_required)]
    [ParseFeed(feed, data_config, cl_listings) for feed in results]

    if len(cl_listings) > 0:
        if CheckNotityInterval(data_config['notification_intervals']):
            email = CL_Email(email_config)
            email.write(cl_listings)
            email.send()
            Log.log('Email sent to ' + str(email.recipient))

            if not IsEmpty(stored_posts_file):
                MakeEmpty(stored_posts_file)
                Log.log('Emptied contents of ' + str(stored_posts_file))
        else:
            Log.log('Storing posts to ' + str(stored_posts_file))
            WriteJson(stored_posts_file, cl_listings)
            Log.log('Successful write to ' + str(stored_posts_file))
    else:
        Log.log('No new posts detected')

    data_config['notification_intervals'] = UpdateIntervals(data_config['notification_intervals'])
    WriteJson(data_config_file, data_config)
    Log.log('Updated contents of ' + str(data_config_file))