def sifting_date_str_stamp(date_str): """ 将获取到的日期转成有效的13位时间戳 """ try: if re.match(r'[0-9]+年\s\d月', "2016年 4月11日 10:30"): time_stamp = time.mktime( time.strptime("2016年 4月11日 10:30", '%Y年 %m月%d日 %H:%M')) return int(time_stamp * 1000) elif re.match(r'\d月', "4月11日 10:30"): year = format_time(time.localtime(), '%Y') date_str = year + "年 4月11日 10:30" time_stamp = time.mktime( time.strptime(date_str, '%Y年 %m月%d日 %H:%M')) return int(time_stamp * 1000) elif re.match(r'今天\d', "今天09:17"): date_str = format_time(time.localtime(), '%Y-%m-%d') date_str = date_str + " " + "今天09:17"[2:] time_stamp = time.mktime(time.strptime(date_str, '%Y-%m-%d %H:%M')) return int(time_stamp * 1000) elif re.match(r'\d+', "35分钟前"): minute = re.match(r'\d+', "35分钟前").group() that_time = datetime.datetime.now() + datetime.timedelta( minutes=-int(minute)) return int(that_time.timestamp() * 1000) except Exception as err: logger.error("--> sifting real timestamp error, {} , {} ".format( date_str, str(err))) # 默认为当前日期 return int(time.time() * 1000)
def get_one(tagPo): try: mysql = MyPymysqlPool(ENV.MYSQL_CONF_TYPE) select_sql = "select id,`name`,href from lp_tag " where_sql = "where " where_start_sql = "" if tagPo.id: where_start_sql += " and id=%d" % tagPo.id if tagPo.href: where_start_sql += " and href='%s'" % tagPo.href if where_start_sql and where_start_sql.__len__() > 0: where_start_sql = where_start_sql[4:] select_sql = select_sql + "where " + where_start_sql result = mysql.getOne(sql=select_sql) logger.info("--> select result " + str(result)) if result: tagPo = TagPO() tagPo.__dict__.update(result) return tagPo except Exception as err: logger.error("--> select news error, sql :{}, err :{}".format( select_sql, str(err))) finally: mysql.dispose() return False
def get_one(newsPo): try: mysql = MyPymysqlPool(ENV.MYSQL_CONF_TYPE) select_sql = "select id,module,title,synopsis,href,create_time,comment_count from lp_news " where_sql = "where " where_start_sql = "" if newsPo.id: where_start_sql += " and id=%d" % newsPo.id if newsPo.href: where_start_sql += " and href='%s'" % newsPo.href if where_start_sql and where_start_sql.__len__() > 0: where_start_sql = where_start_sql[4:] select_sql = select_sql + "where " + where_start_sql result = mysql.getOne(sql=select_sql) logger.info("--> select result " + str(result)) if result: logger.info("id :{}, module:{}, title :{}, ".format( result['id'], result['module'], result['title'])) newsObj = NewsPO() newsObj.__dict__.update(result) return newsObj except Exception as err: logger.error("--> select news error, sql :{}, err :{}".format( select_sql, str(err))) finally: mysql.dispose() return False
def get_conn(): # 打开数据库链接 try: db = pymysql.connect(get_property('host'), get_property('user'), get_property('password'), get_property('database'), int(get_property('port')), charset=ENV.MYSQL_CHARSET) return db except Exception as e: logger.error("--> get db failed, " + str(e)) return None
def insert(tagPo): mysql = MyPymysqlPool(ENV.MYSQL_CONF_TYPE) insert_sql = "insert into lp_tag(`name`, href) " \ "values('%s','%s')" % (str(tagPo.name), str(tagPo.href)) try: result = mysql.insert(sql=insert_sql) tagPo.id = mysql.getInsertId() logger.info("--> insert news result : " + str(result)) return result except Exception as err: logger.error( "--> insert news error, sql :{}, param :{}, \nerr :{}".format( insert_sql, class_to_dict(tagPo), str(err))) finally: mysql.dispose() return 0
def insert(newsTagPo): mysql = MyPymysqlPool(ENV.MYSQL_CONF_TYPE) insert_sql = "insert into lp_news_tag( tag_id, news_id) " \ "values( %d, %d)" % (newsTagPo.tag_id, newsTagPo.news_id) try: result = mysql.insert(sql=insert_sql) newsTagPo.id = mysql.getInsertId() logger.info("--> insert lp_news_tag result : " + str(result)) return result except Exception as err: logger.error( "--> insert lp_news_tag error, sql :{}, param :{}, \nerr :{}". format(insert_sql, class_to_dict(newsTagPo), str(err))) finally: mysql.dispose() return 0
def insert(newsPo): mysql = MyPymysqlPool(ENV.MYSQL_CONF_TYPE) insert_sql = "insert into lp_news(module, title, synopsis, href, create_time, comment_count) " \ "values(%d,'%s','%s','%s',%d,'%s')" % ( newsPo.module, str(newsPo.title), str(newsPo.synopsis), str(newsPo.href), newsPo.create_time, str(newsPo.comment_count)) # param = (str(newsPo.module), str(newsPo.title), str(newsPo.synopsis), str(newsPo.href), str(newsPo.create_time), # str(newsPo.comment_count)) try: result = mysql.insert(sql=insert_sql) print("--> insert id :{}".format(mysql.getInsertId())) newsPo.id = mysql.getInsertId() logger.info("--> insert news result : " + str(result)) return result except Exception as err: logger.error( "--> insert news error, sql :{}, param :{}, \nerr :{}".format( insert_sql, class_to_dict(newsPo), str(err))) finally: mysql.dispose() return 0
def load_content_file_to_db(index): file = None try: file_path = global_consts[ "sina_bs_folder"] + "20180313{}{}.txt".format(os.sep, index) file = open(file_path, 'r') ss = file.read() if ss: json_cont = json.loads(ss) if json_cont.__len__() > 1: if isinstance(json_cont, dict): for k, v in json_cont.items(): news_po = NewsPO(module=10000, title=v['title'], synopsis=v['synopsis'], href=v['href'], create_time=sifting_date_str_stamp( v['create_time']), comment_count=sifting_comment_count( v['comment'])) logger.info(class_to_dict(news_po)) if news_po.href: get_cont = news_mysql.get_one( NewsPO(href=news_po.href)) if get_cont: logger.warn( "--> news has exist, href :{}".format( get_cont.href)) else: result = news_mysql.insert(news_po) logger.info("--> insert news result : " + str(result)) else: result = news_mysql.insert(news_po) logger.info("--> insert news result : " + str(result)) tags = v['tags'] if tags and tags.__len__() > 0: tag_objs = [] for tk in tags: if tk: tag_obj = TagPO() for tv in tk: if str(tv).startswith('http'): tag_obj.href = tv else: tag_obj.name = tv tag_objs.append(tag_obj) if tag_objs.__len__() > 0: for toj in tag_objs: if tags_mysql.get_one(toj): logger.warn( "--> tag has exist : {}".format( toj.href)) else: result = tags_mysql.insert(toj) logger.info( "--> insert tag result : {}". format(result)) if toj.id and news_po.id: news_tag = NewsTag( tag_id=toj.id, news_id=news_po.id) result = news_tag_mysql.insert( news_tag) logger.info( "--> insert news_tag result :{}" .format(result)) except Exception as err: logger.error("--> load content file error : " + str(err), err) finally: if file: file.close()