コード例 #1
0
ファイル: sina_finance.py プロジェクト: cbbing/wealth_spider
def run_unfinish_content_is_null():
    """
    重新获取内容空缺的文章
    :return:
    """
    sql = 'select classify, url from {_table} where length(content)=0 limit 10'.format(_table=mysql_table_sina_finance)
    df = pd.read_sql(sql, engine)
    for ix, row in df.iterrows():
        content = _latest_content_by_beautifulsoup(row['url'])
        if content:
            sql = 'update {0} set content= "{1}" where classify="{2}" and url="{3}"'.format(mysql_table_sina_finance, content, row['classify'], row['url'])
            print sql
            engine.execute(sql)
コード例 #2
0
ファイル: sina_licaishi.py プロジェクト: cbbing/wealth_spider
    def to_mysql(self):

        try:

            df = DataFrame(
                {
                    "user_id": [self.user_id],
                    "user_name": [self.user_name],
                    "title": [self.title],
                    "detail": [self.detail],
                    "publish_time": [self.publish_time],
                    "href": [self.href],
                    "watch_count": [self.watch_count],
                    "repost_count": [self.repost_count],
                    "donate_count": [self.donate_count],
                    "comment_count": [self.comment_count],
                    #'is_top':[self.is_top],
                    #'is_repost':[self.is_repost],
                    #'repost_reason':[self.repost_reason
                    "device": [self.device],  # ]
                },
                columns=[
                    "user_id",
                    "user_name",
                    "title",
                    "detail",
                    "publish_time",
                    "href",
                    "watch_count",
                    "repost_count",
                    "donate_count",
                    "comment_count",
                    "device",
                ],
            )
            print df

            try:
                sql_del = "delete from {table} where user_id='{user_id}' and title='{title}' and publish_time='{publish_time}'".format(
                    table=mysql_table_licaishi_viewpoint,
                    user_id=self.user_id,
                    title=self.title,
                    publish_time=self.publish_time,
                )
                engine.execute(sql_del)
            except Exception, e:
                print "delete error! ", str(e)

            df.to_sql(mysql_table_licaishi_viewpoint, engine, if_exists="append", index=False)
            return True
コード例 #3
0
ファイル: weixin.py プロジェクト: cbbing/wealth_spider
    def to_mysql(self):

        try:

            df = DataFrame({'user_id':[self.user_id],
                            'user_name':[self.user_name],
                            'title':[self.title],
                            'detail': [self.detail],
                            'publish_time':[self.publish_time],
                            'capture_time':[self.capture_time],
                            'device':[self.device],
                            'href':[self.href],
                            # 'repost_count':[self.repost_count],
                            # 'donate_count':[self.donate_count],
                            # 'comment_count':[self.comment_count],
                            # 'is_top':[self.is_top],
                            # 'is_repost':[self.is_repost],
                            # 'repost_reason':[self.repost_reason]

                            },
                           columns=['user_id', 'user_name', 'title', 'detail',
                                    'publish_time', 'capture_time', 'device','href'])
            print df

            try:
                sql_del = "delete from {table} where user_id='{user_id}' and href='{href}' and publish_time='{publish_time}'".format(
                        table = mysql_table_weixin_article,
                        user_id = self.user_id,
                        href = self.href,
                        publish_time = self.publish_time
                        )
                engine.execute(sql_del)
            except Exception,e:
                print 'delete error!', str(e)

            df.to_sql(mysql_table_weixin_article, engine, if_exists='append', index=False)
            return True