def update_with_dic(dic): mysql = MysqlHelper(Configs()) update_sql = ScrappdeDataDao.part_dic_to_update_sql(dic) sql = "update " + tableName + " set " + update_sql + " where id = '" + dic[ 'id'] + "'" mysql.update(sql)
def set_elog(id: str, elog) -> (): mysql = MysqlHelper(Configs()) sql = "update " + TBScrapingTarget.TableName + " set " + TBScrapingTarget.elog.key + " = %s where id = %s " mysql.execute_sql_with_par(sql, (str(elog), id)) set_last_failed_time(id)
def save_data(self, url): id = genUUID() name = genUUID() + ".jpg" dic = dict() dic['filename'] = name dic['min_path'] = url dic['normal_path'] = url dic['origin_filename'] = name dic['valid'] = 1 dis = "" for key in dic: dis += key dis += "='" dis += str(dic[key]) dis += "'," dis = dis[0:len(dis) - 1] mysql = MysqlHelper(Configs()) sql = "insert into " sql += tableName sql += " set id=%s," sql += dis mysql.save(sql, id) return id
def set_last_access_date(id: str): now = DateGo.get_current_date() mysql = MysqlHelper(Configs()) sql = "update scraping_target set last_access_date = %s where id = %s " mysql.execute_sql_with_par(sql, (now, id))
def set_last_failed_time(id: str): now = DateGo.get_current_date() mysql = MysqlHelper(Configs()) sql = "update " + TBScrapingTarget.TableName + " set " + TBScrapingTarget.last_failed_time.key + " = %s where id = %s " mysql.execute_sql_with_par(sql, (now, id))
def set_last_access_date(self, id): now = DateGo.get_current_date() mysql = MysqlHelper(Configs()) sql = "update " + self.table_name + " set " + self.TB.last_access_date.key + " = %s where " + self.TB.id.key + " = %s " mysql.execute_sql_with_par(sql, (now, id))
def base_init(): LogGo.init(Configs()) RequestHelper.init(Configs()) SMTPServer.init(Configs()) Download(Configs()) RequestHelperClassVer.init(Configs()) ProxyHelper.init(Configs()) MysqlHelper.init(Configs()) BaseStrategy.init()
def get_max_order_code(): list = [] mysql = MysqlHelper(Configs()) sql = "select `order_code` from news where is_scrapped = 1 order by `order_code` desc limit 0,1" result = mysql.load(sql) if len(result) > 0: item = result[0] return int(item['order_code']) else: return 0
def get_all_title(): list = [] mysql = MysqlHelper(Configs()) sql = "select `title` from news where is_scrapped = 1" result = mysql.load(sql) for item in result: list.append(item['title']) if len(list) < 1: list.append('QWERTYUIOP') return list
def get_frequency(self, id): mysql = MysqlHelper(Configs()) sql = "select " + self.TB.frequency.key + " from " + self.table_name + " where " + self.TB.id.key + " = '" + id + "' limit 0,1" result = mysql.load(sql) if len(result) > 0: item = result[0] item = item[self.TB.frequency.key] if isinstance(item, (int, bool)): return item else: return 0 else: return 0
def get_frequency(id): mysql = MysqlHelper(Configs()) sql = "select frequency from scraping_target where id = '" + id + "' limit 0,1" result = mysql.load(sql) if len(result) > 0: item = result[0] item = item['frequency'] if isinstance(item, (int, bool)): return item else: return 0 else: return 0
def get_target_list(self): list = [] mysql = MysqlHelper(Configs()) sql = "select * from " + self.table_name + " where " + self.TB.valid.key + " = 1 order by " + self.TB.order_code.key + " desc" result = mysql.load(sql) for item in result: target = Target() for key in item: target.__dict__[key] = item[key] list.insert(0, target) return list
def get_all_value_for_key(key): list = [] mysql = MysqlHelper(Configs()) sql = "select `" + str( key) + "` from news where is_scrapped = 1 and " + str( key) + " is not null" result = mysql.load(sql) for item in result: list.append(item[str(key)]) if len(list) < 1: list.append('QWERTYUIOP') return list
def get_target_list(): list = [] mysql = MysqlHelper(Configs()) # sql = "select * from scraping_target where valid = 1 and " + TBScrapingTarget.type.key + " = " + "'newrank'" + " order by order_code desc" sql = "select * from scraping_target where valid = 1 " + " order by order_code desc" result = mysql.load(sql) for item in result: target = Target() for key in item: target.__dict__[key] = item[key] list.insert(0, target) return list
def save_data_insert(dic): id = genUUID() mysql = MysqlHelper(Configs()) _sql = "INSERT INTO news(`id`, " _val = " VALUES (%s," _par = [ id, ] for key in dic: _sql += "`" _sql += key _sql += "`," if type(dic[key]) == type(str): _val += "%s," elif type(dic[key]) == type(int): _val += "%i," else: _val += "%s," _par.append(dic[key]) _sql = _sql[0:len(_sql) - 1] _sql += ")" _val = _val[0:len(_val) - 1] _val += ")" _sql += _val mysql.save(_sql, _par) sql = "insert into " sql += article_table_name sql += " set id=%s" mysql.save(sql, id)
def get_last_access_date(self, id): list = [] mysql = MysqlHelper(Configs()) sql = "select " + self.TB.last_access_date.key + " from " + self.table_name + " where " + self.TB.id.key + " = '" + id + "' limit 0,1" result = mysql.load(sql) if len(result) > 0: item = result[0] item = item[self.TB.last_access_date.key] # print(isinstance(item, (datetime.datetime, bool))) # print(type(item)) # print(type(datetime.datetime.date())) if isinstance(item, (datetime.datetime, bool)): return item else: return DateGo.get_current_date_raw() else: return DateGo.get_current_date_raw()
def par_to_get_dic(select_list, and_dic, order_key=None, limit_tup=None): list = [] select_sql = ScrappdeDataDao.list_to_sql_part(select_list) and_sql = ScrappdeDataDao.dic_to_sql_part(and_dic) if order_key != None: order_key = ' order by ' + order_key[0] + ' ' + order_key[1] else: order_key = '' if limit_tup != None: limit_tup = ' limit ' + str(limit_tup[0]) + ',' + str(limit_tup[1]) else: limit_tup = '' mysql = MysqlHelper(Configs()) sql = "select " + select_sql + " from " + tableName + " where is_scrapped = 1 and " + and_sql + " " + order_key + limit_tup result = mysql.load(sql) for item in result: list.append(item) return list
def get_last_access_date(id): list = [] mysql = MysqlHelper(Configs()) sql = "select last_access_date from scraping_target where id = '" + id + "' limit 0,1" result = mysql.load(sql) if len(result) > 0: item = result[0] item = item['last_access_date'] # print(isinstance(item, (datetime.datetime, bool))) # print(type(item)) # print(type(datetime.datetime.date())) if isinstance(item, (datetime.datetime, bool)): return item else: return DateGo.get_current_date_raw() else: return DateGo.get_current_date_raw()
def loops(self,target,exists,result): try: base_url = target.extra0 for i in range(0, Configs().length_weibo): # [::-1]: print("page: " + str(i)) list = self.build_and_request(WeiboRuler.keys, base_url, WeiboRuler.request_getindex, i) if len(list) == 0: break for item in list: """日常抓取时的重复验证""" if exists.count(item['id']) < 1: result.append(item) else: return except Exception as e: import traceback msg = traceback.format_exc() print(msg) LogGo.warning(repr(e)) LogGo.warning("Scan Failed!") return
def save_data(dic): dis = "" id = genUUID() for key in dic: dis += key dis += "='" dis += str(dic[key]) dis += "'," dis = dis[0:len(dis) - 1] mysql = MysqlHelper(Configs()) sql = "insert into news set id=%s," sql += dis mysql.save(sql, id) sql = "insert into " sql += article_table_name sql += " set id=%s" mysql.save(sql, id)
def __init__(self): GeneralDao.__init__(self, Configs()) self.table = TBSpecialTarget() self.table_name = self.table.TableName
def __init__(self): GeneralDao.__init__(self, Configs()) self.TB = TBGlobalTarget() self.table_name = self.TB.TableName
def __init__(self): GeneralDao.__init__(self, Configs()) self.TB = TBDictionaryType() self.table_name = self.TB.TableName
def reset_target_valid(target): mysql = MysqlHelper(Configs()) sql = "update scraping_target set valid = 0 where id = '" + target.id + "'" mysql.update(sql)
def __init__(self): GeneralDao.__init__(self, Configs()) self.TB = TBMR() self.table_name = self.TB.TableName
"requests==2.12.4\r\n" "shellescape==3.4.1\r\n" "six==1.10.0\r\n" "stem==1.6.0\r\n" "tzlocal==1.3\r\n" "Werkzeug==0.11.15\r\n") print(" ") print("------ ---- --- ---getIndexNumber--- ------ ---- --") print(" ") scan = Scanner() while True: LogGo.info(">>> New Loop -->") try: scan.start() except Exception as e: import traceback msg = traceback.format_exc() LogGo.error(msg) if Configs.infinity: while not somebody_help.isthataworkday(Configs().work_sequence): time.sleep(60 * 60) else: break LogGo.info("-- GH Offline --") print("---------------------------------------------")
def __init__(self): GeneralDao.__init__(self, Configs()) self.TB = TBWenzhangInfo() self.table_name = self.TB.TableName
def reset_target_valid(self, target): mysql = MysqlHelper(Configs()) sql = "update " + self.table_name + " set " + self.TB.valid.key + " = 0 where " + self.TB.id.key + " = '" + target.id + "'" mysql.update(sql)
def __init__(self): GeneralDao.__init__(self, Configs()) self.TB = TBSoapBlackList self.table_name = self.TB.TableName
def __init__(self): self.TB = TBProgram() self.table_name = self.TB.TableName GeneralDao.__init__(self, Configs())