def __init__(self, conf): self.conf = conf handler = TimedRotatingFileHandler(conf.log_file, date_format="%Y-%m-%d") handler.push_application() self.logger = Logger("Firetower-server") self.queue = redis_util.get_redis_conn(host=conf.redis_host, port=conf.redis_port, redis_db=conf.redis_db) self.classifier = classifier.Levenshtein() self.last_archive = None
def __init__(self, conf): self.conf = config.Config(conf) self.redis_host = self.conf.redis_host self.redis_port = self.conf.redis_port self.redis_db = self.conf.redis_db self.queue_key = self.conf.queue_key self.queue = redis_util.get_redis_conn( host=self.redis_host, port=self.redis_port, redis_db=self.redis_db)
def __init__(self,sheet_name,settings=SETTINGS): self.mongo_db = MongodbClass() self.mysql_conn = MySqlDBClass(settings) self.sheet_name = sheet_name self.primary_keys = [] self.mongo_batch_size = 400 self.mysql_batch_size = 200 self.redis_conn = get_redis_conn() self.def_logger = update_logging() self.file_path = 'C:\\Program Files (x86)\\crawling_server\\wangban_utils'
def __init__(self, conf): """conf: dict, yaml parameters.""" self.conf = conf handler = TimedRotatingFileHandler( conf.log_file, date_format='%Y-%m-%d') handler.push_application() self.logger = Logger('Firetower-admin') self.queue = redis_util.get_redis_conn( host=conf.redis_host, port=conf.redis_port, redis_db=conf.redis_db ) self.classifier = classifier.Levenshtein()
def __init__(self, conf): self.conf = conf handler = TimedRotatingFileHandler( conf.log_file, date_format='%Y-%m-%d') handler.push_application() self.logger = Logger('Firetower-server') self.queue = redis_util.get_redis_conn( host=conf.redis_host, port=conf.redis_port, redis_db=conf.redis_db ) self.classifiers = [] for classifier_name in conf.class_order: self.classifiers.append(getattr(classifier, classifier_name)()) self.last_archive = None
#print(detect_num) return detect_num def delete_from_db(self, _id, sheet_name): try: self.mongo_db[sheet_name].remove({'_id': _id}) print('delete successfully') except Exception as e: print('delete from mongo error', e) if __name__ == '__main__': from redis_util import get_redis_conn import json mongo_instance = MongodbClass() redis_conn = get_redis_conn() #mongo_instance.get_from_db('zhuji',return_field='_id') #select_conditions = {'an_major':'其他交易'} ##select_conditions = {'an_major':'工程建设','an_type':"招标公告"} ##mongo_instance.detect_from_db('beilun',select_conditions) cleaner = Cleaner(page_structure=False, links=False, style=True, scripts=True) for data in mongo_instance.get_all_from_db('linan_clean'): print(data['ID']) #if not data['PUBDATE'].startswith('2019'): # continue input_value = {} input_value[data['LINK']] = {}
def __init__(self,sheet_name): self.mongo_conn = MongodbClass() self.redis_conn = get_redis_conn() self.sheet_name = sheet_name